diff options
Diffstat (limited to 'libjava/classpath/java/lang')
-rw-r--r-- | libjava/classpath/java/lang/Character.java | 2551 | ||||
-rw-r--r-- | libjava/classpath/java/lang/ClassNotFoundException.java | 5 | ||||
-rw-r--r-- | libjava/classpath/java/lang/Math.java | 351 | ||||
-rw-r--r-- | libjava/classpath/java/lang/String.java | 70 | ||||
-rw-r--r-- | libjava/classpath/java/lang/StringBuilder.java | 61 | ||||
-rw-r--r-- | libjava/classpath/java/lang/System.java | 17 | ||||
-rw-r--r-- | libjava/classpath/java/lang/Thread.java | 2 | ||||
-rw-r--r-- | libjava/classpath/java/lang/reflect/Proxy.java | 44 |
8 files changed, 2740 insertions, 361 deletions
diff --git a/libjava/classpath/java/lang/Character.java b/libjava/classpath/java/lang/Character.java index 3c88ff805c7..98ad147aaa3 100644 --- a/libjava/classpath/java/lang/Character.java +++ b/libjava/classpath/java/lang/Character.java @@ -41,6 +41,8 @@ package java.lang; import gnu.java.lang.CharData; import java.io.Serializable; +import java.text.Collator; +import java.util.Locale; /** * Wrapper class for the primitive char data type. In addition, this class @@ -133,10 +135,10 @@ public final class Character implements Serializable, Comparable * is in at most one of these blocks. * * This inner class was generated automatically from - * <code>doc/unicode/Block-3.txt</code>, by some perl scripts. + * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts. * This Unicode definition file can be found on the * <a href="http://www.unicode.org">http://www.unicode.org</a> website. - * JDK 1.4 uses Unicode version 3.0.0. + * JDK 1.5 uses Unicode version 4.0.0. * * @author scripts/unicode-blocks.pl (written by Eric Blake) * @since 1.2 @@ -144,10 +146,18 @@ public final class Character implements Serializable, Comparable public static final class UnicodeBlock extends Subset { /** The start of the subset. */ - private final char start; + private final int start; /** The end of the subset. */ - private final char end; + private final int end; + + /** The canonical name of the block according to the Unicode standard. */ + private final String canonicalName; + + /** Constants for the <code>forName()</code> method */ + private static final int CANONICAL_NAME = 0; + private static final int NO_SPACES_NAME = 1; + private static final int CONSTANT_NAME = 2; /** * Constructor for strictly defined blocks. @@ -155,25 +165,46 @@ public final class Character implements Serializable, Comparable * @param start the start character of the range * @param end the end character of the range * @param name the block name + * @param canonicalName the name of the block as defined in the Unicode + * standard. */ - private UnicodeBlock(char start, char end, String name) + private UnicodeBlock(int start, int end, String name, + String canonicalName) { super(name); this.start = start; this.end = end; + this.canonicalName = canonicalName; } /** * Returns the Unicode character block which a character belongs to. + * <strong>Note</strong>: This method does not support the use of + * supplementary characters. For such support, <code>of(int)</code> + * should be used instead. * * @param ch the character to look up * @return the set it belongs to, or null if it is not in one */ public static UnicodeBlock of(char ch) { - // Special case, since SPECIALS contains two ranges. - if (ch == '\uFEFF') - return SPECIALS; + return of((int) ch); + } + + /** + * Returns the Unicode character block which a code point belongs to. + * + * @param codePoint the character to look up + * @return the set it belongs to, or null if it is not in one. + * @throws IllegalArgumentException if the specified code point is + * invalid. + * @since 1.5 + */ + public static UnicodeBlock of(int codePoint) + { + if (codePoint > MAX_CODE_POINT) + throw new IllegalArgumentException("The supplied integer value is " + + "too large to be a codepoint."); // Simple binary search for the correct block. int low = 0; int hi = sets.length - 1; @@ -181,9 +212,9 @@ public final class Character implements Serializable, Comparable { int mid = (low + hi) >> 1; UnicodeBlock b = sets[mid]; - if (ch < b.start) + if (codePoint < b.start) hi = mid - 1; - else if (ch > b.end) + else if (codePoint > b.end) low = mid + 1; else return b; @@ -192,703 +223,1300 @@ public final class Character implements Serializable, Comparable } /** + * <p> + * Returns the <code>UnicodeBlock</code> with the given name, as defined + * by the Unicode standard. The version of Unicode in use is defined by + * the <code>Character</code> class, and the names are given in the + * <code>Blocks-<version>.txt</code> file corresponding to that version. + * The name may be specified in one of three ways: + * </p> + * <ol> + * <li>The canonical, human-readable name used by the Unicode standard. + * This is the name with all spaces and hyphens retained. For example, + * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li> + * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li> + * <li>The name used for the constants specified by this class, which + * is the canonical name with all spaces and hyphens replaced with + * underscores e.g. `BASIC_LATIN'</li> + * </ol> + * <p> + * The names are compared case-insensitively using the case comparison + * associated with the U.S. English locale. The method recognises the + * previous names used for blocks as well as the current ones. At + * present, this simply means that the deprecated `SURROGATES_AREA' + * will be recognised by this method (the <code>of()</code> methods + * only return one of the three new surrogate blocks). + * </p> + * + * @param blockName the name of the block to look up. + * @return the specified block. + * @throws NullPointerException if the <code>blockName</code> is + * <code>null</code>. + * @throws IllegalArgumentException if the name does not match any Unicode + * block. + * @since 1.5 + */ + public static final UnicodeBlock forName(String blockName) + { + int type; + if (blockName.indexOf(' ') != -1) + type = CANONICAL_NAME; + else if (blockName.indexOf('_') != -1) + type = CONSTANT_NAME; + else + type = NO_SPACES_NAME; + Collator usCollator = Collator.getInstance(Locale.US); + usCollator.setStrength(Collator.PRIMARY); + /* Special case for deprecated blocks not in sets */ + switch (type) + { + case CANONICAL_NAME: + if (usCollator.compare(blockName, "Surrogates Area") == 0) + return SURROGATES_AREA; + break; + case NO_SPACES_NAME: + if (usCollator.compare(blockName, "SurrogatesArea") == 0) + return SURROGATES_AREA; + break; + case CONSTANT_NAME: + if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) + return SURROGATES_AREA; + break; + } + /* Other cases */ + int setLength = sets.length; + switch (type) + { + case CANONICAL_NAME: + for (int i = 0; i < setLength; i++) + { + UnicodeBlock block = sets[i]; + if (usCollator.compare(blockName, block.canonicalName) == 0) + return block; + } + break; + case NO_SPACES_NAME: + for (int i = 0; i < setLength; i++) + { + UnicodeBlock block = sets[i]; + String nsName = block.canonicalName.replaceAll(" ",""); + if (usCollator.compare(blockName, nsName) == 0) + return block; + } + break; + case CONSTANT_NAME: + for (int i = 0; i < setLength; i++) + { + UnicodeBlock block = sets[i]; + if (usCollator.compare(blockName, block.toString()) == 0) + return block; + } + break; + } + throw new IllegalArgumentException("No Unicode block found for " + + blockName + "."); + } + + /** * Basic Latin. - * '\u0000' - '\u007F'. + * 0x0000 - 0x007F. */ public static final UnicodeBlock BASIC_LATIN - = new UnicodeBlock('\u0000', '\u007F', - "BASIC_LATIN"); + = new UnicodeBlock(0x0000, 0x007F, + "BASIC_LATIN", + "Basic Latin"); /** * Latin-1 Supplement. - * '\u0080' - '\u00FF'. + * 0x0080 - 0x00FF. */ public static final UnicodeBlock LATIN_1_SUPPLEMENT - = new UnicodeBlock('\u0080', '\u00FF', - "LATIN_1_SUPPLEMENT"); + = new UnicodeBlock(0x0080, 0x00FF, + "LATIN_1_SUPPLEMENT", + "Latin-1 Supplement"); /** * Latin Extended-A. - * '\u0100' - '\u017F'. + * 0x0100 - 0x017F. */ public static final UnicodeBlock LATIN_EXTENDED_A - = new UnicodeBlock('\u0100', '\u017F', - "LATIN_EXTENDED_A"); + = new UnicodeBlock(0x0100, 0x017F, + "LATIN_EXTENDED_A", + "Latin Extended-A"); /** * Latin Extended-B. - * '\u0180' - '\u024F'. + * 0x0180 - 0x024F. */ public static final UnicodeBlock LATIN_EXTENDED_B - = new UnicodeBlock('\u0180', '\u024F', - "LATIN_EXTENDED_B"); + = new UnicodeBlock(0x0180, 0x024F, + "LATIN_EXTENDED_B", + "Latin Extended-B"); /** * IPA Extensions. - * '\u0250' - '\u02AF'. + * 0x0250 - 0x02AF. */ public static final UnicodeBlock IPA_EXTENSIONS - = new UnicodeBlock('\u0250', '\u02AF', - "IPA_EXTENSIONS"); + = new UnicodeBlock(0x0250, 0x02AF, + "IPA_EXTENSIONS", + "IPA Extensions"); /** * Spacing Modifier Letters. - * '\u02B0' - '\u02FF'. + * 0x02B0 - 0x02FF. */ public static final UnicodeBlock SPACING_MODIFIER_LETTERS - = new UnicodeBlock('\u02B0', '\u02FF', - "SPACING_MODIFIER_LETTERS"); + = new UnicodeBlock(0x02B0, 0x02FF, + "SPACING_MODIFIER_LETTERS", + "Spacing Modifier Letters"); /** * Combining Diacritical Marks. - * '\u0300' - '\u036F'. + * 0x0300 - 0x036F. */ public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS - = new UnicodeBlock('\u0300', '\u036F', - "COMBINING_DIACRITICAL_MARKS"); + = new UnicodeBlock(0x0300, 0x036F, + "COMBINING_DIACRITICAL_MARKS", + "Combining Diacritical Marks"); /** * Greek. - * '\u0370' - '\u03FF'. + * 0x0370 - 0x03FF. */ public static final UnicodeBlock GREEK - = new UnicodeBlock('\u0370', '\u03FF', - "GREEK"); + = new UnicodeBlock(0x0370, 0x03FF, + "GREEK", + "Greek"); /** * Cyrillic. - * '\u0400' - '\u04FF'. + * 0x0400 - 0x04FF. */ public static final UnicodeBlock CYRILLIC - = new UnicodeBlock('\u0400', '\u04FF', - "CYRILLIC"); + = new UnicodeBlock(0x0400, 0x04FF, + "CYRILLIC", + "Cyrillic"); + + /** + * Cyrillic Supplementary. + * 0x0500 - 0x052F. + * @since 1.5 + */ + public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY + = new UnicodeBlock(0x0500, 0x052F, + "CYRILLIC_SUPPLEMENTARY", + "Cyrillic Supplementary"); /** * Armenian. - * '\u0530' - '\u058F'. + * 0x0530 - 0x058F. */ public static final UnicodeBlock ARMENIAN - = new UnicodeBlock('\u0530', '\u058F', - "ARMENIAN"); + = new UnicodeBlock(0x0530, 0x058F, + "ARMENIAN", + "Armenian"); /** * Hebrew. - * '\u0590' - '\u05FF'. + * 0x0590 - 0x05FF. */ public static final UnicodeBlock HEBREW - = new UnicodeBlock('\u0590', '\u05FF', - "HEBREW"); + = new UnicodeBlock(0x0590, 0x05FF, + "HEBREW", + "Hebrew"); /** * Arabic. - * '\u0600' - '\u06FF'. + * 0x0600 - 0x06FF. */ public static final UnicodeBlock ARABIC - = new UnicodeBlock('\u0600', '\u06FF', - "ARABIC"); + = new UnicodeBlock(0x0600, 0x06FF, + "ARABIC", + "Arabic"); /** * Syriac. - * '\u0700' - '\u074F'. + * 0x0700 - 0x074F. * @since 1.4 */ public static final UnicodeBlock SYRIAC - = new UnicodeBlock('\u0700', '\u074F', - "SYRIAC"); + = new UnicodeBlock(0x0700, 0x074F, + "SYRIAC", + "Syriac"); /** * Thaana. - * '\u0780' - '\u07BF'. + * 0x0780 - 0x07BF. * @since 1.4 */ public static final UnicodeBlock THAANA - = new UnicodeBlock('\u0780', '\u07BF', - "THAANA"); + = new UnicodeBlock(0x0780, 0x07BF, + "THAANA", + "Thaana"); /** * Devanagari. - * '\u0900' - '\u097F'. + * 0x0900 - 0x097F. */ public static final UnicodeBlock DEVANAGARI - = new UnicodeBlock('\u0900', '\u097F', - "DEVANAGARI"); + = new UnicodeBlock(0x0900, 0x097F, + "DEVANAGARI", + "Devanagari"); /** * Bengali. - * '\u0980' - '\u09FF'. + * 0x0980 - 0x09FF. */ public static final UnicodeBlock BENGALI - = new UnicodeBlock('\u0980', '\u09FF', - "BENGALI"); + = new UnicodeBlock(0x0980, 0x09FF, + "BENGALI", + "Bengali"); /** * Gurmukhi. - * '\u0A00' - '\u0A7F'. + * 0x0A00 - 0x0A7F. */ public static final UnicodeBlock GURMUKHI - = new UnicodeBlock('\u0A00', '\u0A7F', - "GURMUKHI"); + = new UnicodeBlock(0x0A00, 0x0A7F, + "GURMUKHI", + "Gurmukhi"); /** * Gujarati. - * '\u0A80' - '\u0AFF'. + * 0x0A80 - 0x0AFF. */ public static final UnicodeBlock GUJARATI - = new UnicodeBlock('\u0A80', '\u0AFF', - "GUJARATI"); + = new UnicodeBlock(0x0A80, 0x0AFF, + "GUJARATI", + "Gujarati"); /** * Oriya. - * '\u0B00' - '\u0B7F'. + * 0x0B00 - 0x0B7F. */ public static final UnicodeBlock ORIYA - = new UnicodeBlock('\u0B00', '\u0B7F', - "ORIYA"); + = new UnicodeBlock(0x0B00, 0x0B7F, + "ORIYA", + "Oriya"); /** * Tamil. - * '\u0B80' - '\u0BFF'. + * 0x0B80 - 0x0BFF. */ public static final UnicodeBlock TAMIL - = new UnicodeBlock('\u0B80', '\u0BFF', - "TAMIL"); + = new UnicodeBlock(0x0B80, 0x0BFF, + "TAMIL", + "Tamil"); /** * Telugu. - * '\u0C00' - '\u0C7F'. + * 0x0C00 - 0x0C7F. */ public static final UnicodeBlock TELUGU - = new UnicodeBlock('\u0C00', '\u0C7F', - "TELUGU"); + = new UnicodeBlock(0x0C00, 0x0C7F, + "TELUGU", + "Telugu"); /** * Kannada. - * '\u0C80' - '\u0CFF'. + * 0x0C80 - 0x0CFF. */ public static final UnicodeBlock KANNADA - = new UnicodeBlock('\u0C80', '\u0CFF', - "KANNADA"); + = new UnicodeBlock(0x0C80, 0x0CFF, + "KANNADA", + "Kannada"); /** * Malayalam. - * '\u0D00' - '\u0D7F'. + * 0x0D00 - 0x0D7F. */ public static final UnicodeBlock MALAYALAM - = new UnicodeBlock('\u0D00', '\u0D7F', - "MALAYALAM"); + = new UnicodeBlock(0x0D00, 0x0D7F, + "MALAYALAM", + "Malayalam"); /** * Sinhala. - * '\u0D80' - '\u0DFF'. + * 0x0D80 - 0x0DFF. * @since 1.4 */ public static final UnicodeBlock SINHALA - = new UnicodeBlock('\u0D80', '\u0DFF', - "SINHALA"); + = new UnicodeBlock(0x0D80, 0x0DFF, + "SINHALA", + "Sinhala"); /** * Thai. - * '\u0E00' - '\u0E7F'. + * 0x0E00 - 0x0E7F. */ public static final UnicodeBlock THAI - = new UnicodeBlock('\u0E00', '\u0E7F', - "THAI"); + = new UnicodeBlock(0x0E00, 0x0E7F, + "THAI", + "Thai"); /** * Lao. - * '\u0E80' - '\u0EFF'. + * 0x0E80 - 0x0EFF. */ public static final UnicodeBlock LAO - = new UnicodeBlock('\u0E80', '\u0EFF', - "LAO"); + = new UnicodeBlock(0x0E80, 0x0EFF, + "LAO", + "Lao"); /** * Tibetan. - * '\u0F00' - '\u0FFF'. + * 0x0F00 - 0x0FFF. */ public static final UnicodeBlock TIBETAN - = new UnicodeBlock('\u0F00', '\u0FFF', - "TIBETAN"); + = new UnicodeBlock(0x0F00, 0x0FFF, + "TIBETAN", + "Tibetan"); /** * Myanmar. - * '\u1000' - '\u109F'. + * 0x1000 - 0x109F. * @since 1.4 */ public static final UnicodeBlock MYANMAR - = new UnicodeBlock('\u1000', '\u109F', - "MYANMAR"); + = new UnicodeBlock(0x1000, 0x109F, + "MYANMAR", + "Myanmar"); /** * Georgian. - * '\u10A0' - '\u10FF'. + * 0x10A0 - 0x10FF. */ public static final UnicodeBlock GEORGIAN - = new UnicodeBlock('\u10A0', '\u10FF', - "GEORGIAN"); + = new UnicodeBlock(0x10A0, 0x10FF, + "GEORGIAN", + "Georgian"); /** * Hangul Jamo. - * '\u1100' - '\u11FF'. + * 0x1100 - 0x11FF. */ public static final UnicodeBlock HANGUL_JAMO - = new UnicodeBlock('\u1100', '\u11FF', - "HANGUL_JAMO"); + = new UnicodeBlock(0x1100, 0x11FF, + "HANGUL_JAMO", + "Hangul Jamo"); /** * Ethiopic. - * '\u1200' - '\u137F'. + * 0x1200 - 0x137F. * @since 1.4 */ public static final UnicodeBlock ETHIOPIC - = new UnicodeBlock('\u1200', '\u137F', - "ETHIOPIC"); + = new UnicodeBlock(0x1200, 0x137F, + "ETHIOPIC", + "Ethiopic"); /** * Cherokee. - * '\u13A0' - '\u13FF'. + * 0x13A0 - 0x13FF. * @since 1.4 */ public static final UnicodeBlock CHEROKEE - = new UnicodeBlock('\u13A0', '\u13FF', - "CHEROKEE"); + = new UnicodeBlock(0x13A0, 0x13FF, + "CHEROKEE", + "Cherokee"); /** * Unified Canadian Aboriginal Syllabics. - * '\u1400' - '\u167F'. + * 0x1400 - 0x167F. * @since 1.4 */ public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS - = new UnicodeBlock('\u1400', '\u167F', - "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); + = new UnicodeBlock(0x1400, 0x167F, + "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", + "Unified Canadian Aboriginal Syllabics"); /** * Ogham. - * '\u1680' - '\u169F'. + * 0x1680 - 0x169F. * @since 1.4 */ public static final UnicodeBlock OGHAM - = new UnicodeBlock('\u1680', '\u169F', - "OGHAM"); + = new UnicodeBlock(0x1680, 0x169F, + "OGHAM", + "Ogham"); /** * Runic. - * '\u16A0' - '\u16FF'. + * 0x16A0 - 0x16FF. * @since 1.4 */ public static final UnicodeBlock RUNIC - = new UnicodeBlock('\u16A0', '\u16FF', - "RUNIC"); + = new UnicodeBlock(0x16A0, 0x16FF, + "RUNIC", + "Runic"); + + /** + * Tagalog. + * 0x1700 - 0x171F. + * @since 1.5 + */ + public static final UnicodeBlock TAGALOG + = new UnicodeBlock(0x1700, 0x171F, + "TAGALOG", + "Tagalog"); + + /** + * Hanunoo. + * 0x1720 - 0x173F. + * @since 1.5 + */ + public static final UnicodeBlock HANUNOO + = new UnicodeBlock(0x1720, 0x173F, + "HANUNOO", + "Hanunoo"); + + /** + * Buhid. + * 0x1740 - 0x175F. + * @since 1.5 + */ + public static final UnicodeBlock BUHID + = new UnicodeBlock(0x1740, 0x175F, + "BUHID", + "Buhid"); + + /** + * Tagbanwa. + * 0x1760 - 0x177F. + * @since 1.5 + */ + public static final UnicodeBlock TAGBANWA + = new UnicodeBlock(0x1760, 0x177F, + "TAGBANWA", + "Tagbanwa"); /** * Khmer. - * '\u1780' - '\u17FF'. + * 0x1780 - 0x17FF. * @since 1.4 */ public static final UnicodeBlock KHMER - = new UnicodeBlock('\u1780', '\u17FF', - "KHMER"); + = new UnicodeBlock(0x1780, 0x17FF, + "KHMER", + "Khmer"); /** * Mongolian. - * '\u1800' - '\u18AF'. + * 0x1800 - 0x18AF. * @since 1.4 */ public static final UnicodeBlock MONGOLIAN - = new UnicodeBlock('\u1800', '\u18AF', - "MONGOLIAN"); + = new UnicodeBlock(0x1800, 0x18AF, + "MONGOLIAN", + "Mongolian"); + + /** + * Limbu. + * 0x1900 - 0x194F. + * @since 1.5 + */ + public static final UnicodeBlock LIMBU + = new UnicodeBlock(0x1900, 0x194F, + "LIMBU", + "Limbu"); + + /** + * Tai Le. + * 0x1950 - 0x197F. + * @since 1.5 + */ + public static final UnicodeBlock TAI_LE + = new UnicodeBlock(0x1950, 0x197F, + "TAI_LE", + "Tai Le"); + + /** + * Khmer Symbols. + * 0x19E0 - 0x19FF. + * @since 1.5 + */ + public static final UnicodeBlock KHMER_SYMBOLS + = new UnicodeBlock(0x19E0, 0x19FF, + "KHMER_SYMBOLS", + "Khmer Symbols"); + + /** + * Phonetic Extensions. + * 0x1D00 - 0x1D7F. + * @since 1.5 + */ + public static final UnicodeBlock PHONETIC_EXTENSIONS + = new UnicodeBlock(0x1D00, 0x1D7F, + "PHONETIC_EXTENSIONS", + "Phonetic Extensions"); /** * Latin Extended Additional. - * '\u1E00' - '\u1EFF'. + * 0x1E00 - 0x1EFF. */ public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL - = new UnicodeBlock('\u1E00', '\u1EFF', - "LATIN_EXTENDED_ADDITIONAL"); + = new UnicodeBlock(0x1E00, 0x1EFF, + "LATIN_EXTENDED_ADDITIONAL", + "Latin Extended Additional"); /** * Greek Extended. - * '\u1F00' - '\u1FFF'. + * 0x1F00 - 0x1FFF. */ public static final UnicodeBlock GREEK_EXTENDED - = new UnicodeBlock('\u1F00', '\u1FFF', - "GREEK_EXTENDED"); + = new UnicodeBlock(0x1F00, 0x1FFF, + "GREEK_EXTENDED", + "Greek Extended"); /** * General Punctuation. - * '\u2000' - '\u206F'. + * 0x2000 - 0x206F. */ public static final UnicodeBlock GENERAL_PUNCTUATION - = new UnicodeBlock('\u2000', '\u206F', - "GENERAL_PUNCTUATION"); + = new UnicodeBlock(0x2000, 0x206F, + "GENERAL_PUNCTUATION", + "General Punctuation"); /** * Superscripts and Subscripts. - * '\u2070' - '\u209F'. + * 0x2070 - 0x209F. */ public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS - = new UnicodeBlock('\u2070', '\u209F', - "SUPERSCRIPTS_AND_SUBSCRIPTS"); + = new UnicodeBlock(0x2070, 0x209F, + "SUPERSCRIPTS_AND_SUBSCRIPTS", + "Superscripts and Subscripts"); /** * Currency Symbols. - * '\u20A0' - '\u20CF'. + * 0x20A0 - 0x20CF. */ public static final UnicodeBlock CURRENCY_SYMBOLS - = new UnicodeBlock('\u20A0', '\u20CF', - "CURRENCY_SYMBOLS"); + = new UnicodeBlock(0x20A0, 0x20CF, + "CURRENCY_SYMBOLS", + "Currency Symbols"); /** * Combining Marks for Symbols. - * '\u20D0' - '\u20FF'. + * 0x20D0 - 0x20FF. */ public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS - = new UnicodeBlock('\u20D0', '\u20FF', - "COMBINING_MARKS_FOR_SYMBOLS"); + = new UnicodeBlock(0x20D0, 0x20FF, + "COMBINING_MARKS_FOR_SYMBOLS", + "Combining Marks for Symbols"); /** * Letterlike Symbols. - * '\u2100' - '\u214F'. + * 0x2100 - 0x214F. */ public static final UnicodeBlock LETTERLIKE_SYMBOLS - = new UnicodeBlock('\u2100', '\u214F', - "LETTERLIKE_SYMBOLS"); + = new UnicodeBlock(0x2100, 0x214F, + "LETTERLIKE_SYMBOLS", + "Letterlike Symbols"); /** * Number Forms. - * '\u2150' - '\u218F'. + * 0x2150 - 0x218F. */ public static final UnicodeBlock NUMBER_FORMS - = new UnicodeBlock('\u2150', '\u218F', - "NUMBER_FORMS"); + = new UnicodeBlock(0x2150, 0x218F, + "NUMBER_FORMS", + "Number Forms"); /** * Arrows. - * '\u2190' - '\u21FF'. + * 0x2190 - 0x21FF. */ public static final UnicodeBlock ARROWS - = new UnicodeBlock('\u2190', '\u21FF', - "ARROWS"); + = new UnicodeBlock(0x2190, 0x21FF, + "ARROWS", + "Arrows"); /** * Mathematical Operators. - * '\u2200' - '\u22FF'. + * 0x2200 - 0x22FF. */ public static final UnicodeBlock MATHEMATICAL_OPERATORS - = new UnicodeBlock('\u2200', '\u22FF', - "MATHEMATICAL_OPERATORS"); + = new UnicodeBlock(0x2200, 0x22FF, + "MATHEMATICAL_OPERATORS", + "Mathematical Operators"); /** * Miscellaneous Technical. - * '\u2300' - '\u23FF'. + * 0x2300 - 0x23FF. */ public static final UnicodeBlock MISCELLANEOUS_TECHNICAL - = new UnicodeBlock('\u2300', '\u23FF', - "MISCELLANEOUS_TECHNICAL"); + = new UnicodeBlock(0x2300, 0x23FF, + "MISCELLANEOUS_TECHNICAL", + "Miscellaneous Technical"); /** * Control Pictures. - * '\u2400' - '\u243F'. + * 0x2400 - 0x243F. */ public static final UnicodeBlock CONTROL_PICTURES - = new UnicodeBlock('\u2400', '\u243F', - "CONTROL_PICTURES"); + = new UnicodeBlock(0x2400, 0x243F, + "CONTROL_PICTURES", + "Control Pictures"); /** * Optical Character Recognition. - * '\u2440' - '\u245F'. + * 0x2440 - 0x245F. */ public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION - = new UnicodeBlock('\u2440', '\u245F', - "OPTICAL_CHARACTER_RECOGNITION"); + = new UnicodeBlock(0x2440, 0x245F, + "OPTICAL_CHARACTER_RECOGNITION", + "Optical Character Recognition"); /** * Enclosed Alphanumerics. - * '\u2460' - '\u24FF'. + * 0x2460 - 0x24FF. */ public static final UnicodeBlock ENCLOSED_ALPHANUMERICS - = new UnicodeBlock('\u2460', '\u24FF', - "ENCLOSED_ALPHANUMERICS"); + = new UnicodeBlock(0x2460, 0x24FF, + "ENCLOSED_ALPHANUMERICS", + "Enclosed Alphanumerics"); /** * Box Drawing. - * '\u2500' - '\u257F'. + * 0x2500 - 0x257F. */ public static final UnicodeBlock BOX_DRAWING - = new UnicodeBlock('\u2500', '\u257F', - "BOX_DRAWING"); + = new UnicodeBlock(0x2500, 0x257F, + "BOX_DRAWING", + "Box Drawing"); /** * Block Elements. - * '\u2580' - '\u259F'. + * 0x2580 - 0x259F. */ public static final UnicodeBlock BLOCK_ELEMENTS - = new UnicodeBlock('\u2580', '\u259F', - "BLOCK_ELEMENTS"); + = new UnicodeBlock(0x2580, 0x259F, + "BLOCK_ELEMENTS", + "Block Elements"); /** * Geometric Shapes. - * '\u25A0' - '\u25FF'. + * 0x25A0 - 0x25FF. */ public static final UnicodeBlock GEOMETRIC_SHAPES - = new UnicodeBlock('\u25A0', '\u25FF', - "GEOMETRIC_SHAPES"); + = new UnicodeBlock(0x25A0, 0x25FF, + "GEOMETRIC_SHAPES", + "Geometric Shapes"); /** * Miscellaneous Symbols. - * '\u2600' - '\u26FF'. + * 0x2600 - 0x26FF. */ public static final UnicodeBlock MISCELLANEOUS_SYMBOLS - = new UnicodeBlock('\u2600', '\u26FF', - "MISCELLANEOUS_SYMBOLS"); + = new UnicodeBlock(0x2600, 0x26FF, + "MISCELLANEOUS_SYMBOLS", + "Miscellaneous Symbols"); /** * Dingbats. - * '\u2700' - '\u27BF'. + * 0x2700 - 0x27BF. */ public static final UnicodeBlock DINGBATS - = new UnicodeBlock('\u2700', '\u27BF', - "DINGBATS"); + = new UnicodeBlock(0x2700, 0x27BF, + "DINGBATS", + "Dingbats"); + + /** + * Miscellaneous Mathematical Symbols-A. + * 0x27C0 - 0x27EF. + * @since 1.5 + */ + public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A + = new UnicodeBlock(0x27C0, 0x27EF, + "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", + "Miscellaneous Mathematical Symbols-A"); + + /** + * Supplemental Arrows-A. + * 0x27F0 - 0x27FF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A + = new UnicodeBlock(0x27F0, 0x27FF, + "SUPPLEMENTAL_ARROWS_A", + "Supplemental Arrows-A"); /** * Braille Patterns. - * '\u2800' - '\u28FF'. + * 0x2800 - 0x28FF. * @since 1.4 */ public static final UnicodeBlock BRAILLE_PATTERNS - = new UnicodeBlock('\u2800', '\u28FF', - "BRAILLE_PATTERNS"); + = new UnicodeBlock(0x2800, 0x28FF, + "BRAILLE_PATTERNS", + "Braille Patterns"); + + /** + * Supplemental Arrows-B. + * 0x2900 - 0x297F. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B + = new UnicodeBlock(0x2900, 0x297F, + "SUPPLEMENTAL_ARROWS_B", + "Supplemental Arrows-B"); + + /** + * Miscellaneous Mathematical Symbols-B. + * 0x2980 - 0x29FF. + * @since 1.5 + */ + public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B + = new UnicodeBlock(0x2980, 0x29FF, + "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", + "Miscellaneous Mathematical Symbols-B"); + + /** + * Supplemental Mathematical Operators. + * 0x2A00 - 0x2AFF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS + = new UnicodeBlock(0x2A00, 0x2AFF, + "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", + "Supplemental Mathematical Operators"); + + /** + * Miscellaneous Symbols and Arrows. + * 0x2B00 - 0x2BFF. + * @since 1.5 + */ + public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS + = new UnicodeBlock(0x2B00, 0x2BFF, + "MISCELLANEOUS_SYMBOLS_AND_ARROWS", + "Miscellaneous Symbols and Arrows"); /** * CJK Radicals Supplement. - * '\u2E80' - '\u2EFF'. + * 0x2E80 - 0x2EFF. * @since 1.4 */ public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT - = new UnicodeBlock('\u2E80', '\u2EFF', - "CJK_RADICALS_SUPPLEMENT"); + = new UnicodeBlock(0x2E80, 0x2EFF, + "CJK_RADICALS_SUPPLEMENT", + "CJK Radicals Supplement"); /** * Kangxi Radicals. - * '\u2F00' - '\u2FDF'. + * 0x2F00 - 0x2FDF. * @since 1.4 */ public static final UnicodeBlock KANGXI_RADICALS - = new UnicodeBlock('\u2F00', '\u2FDF', - "KANGXI_RADICALS"); + = new UnicodeBlock(0x2F00, 0x2FDF, + "KANGXI_RADICALS", + "Kangxi Radicals"); /** * Ideographic Description Characters. - * '\u2FF0' - '\u2FFF'. + * 0x2FF0 - 0x2FFF. * @since 1.4 */ public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS - = new UnicodeBlock('\u2FF0', '\u2FFF', - "IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); + = new UnicodeBlock(0x2FF0, 0x2FFF, + "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", + "Ideographic Description Characters"); /** * CJK Symbols and Punctuation. - * '\u3000' - '\u303F'. + * 0x3000 - 0x303F. */ public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION - = new UnicodeBlock('\u3000', '\u303F', - "CJK_SYMBOLS_AND_PUNCTUATION"); + = new UnicodeBlock(0x3000, 0x303F, + "CJK_SYMBOLS_AND_PUNCTUATION", + "CJK Symbols and Punctuation"); /** * Hiragana. - * '\u3040' - '\u309F'. + * 0x3040 - 0x309F. */ public static final UnicodeBlock HIRAGANA - = new UnicodeBlock('\u3040', '\u309F', - "HIRAGANA"); + = new UnicodeBlock(0x3040, 0x309F, + "HIRAGANA", + "Hiragana"); /** * Katakana. - * '\u30A0' - '\u30FF'. + * 0x30A0 - 0x30FF. */ public static final UnicodeBlock KATAKANA - = new UnicodeBlock('\u30A0', '\u30FF', - "KATAKANA"); + = new UnicodeBlock(0x30A0, 0x30FF, + "KATAKANA", + "Katakana"); /** * Bopomofo. - * '\u3100' - '\u312F'. + * 0x3100 - 0x312F. */ public static final UnicodeBlock BOPOMOFO - = new UnicodeBlock('\u3100', '\u312F', - "BOPOMOFO"); + = new UnicodeBlock(0x3100, 0x312F, + "BOPOMOFO", + "Bopomofo"); /** * Hangul Compatibility Jamo. - * '\u3130' - '\u318F'. + * 0x3130 - 0x318F. */ public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO - = new UnicodeBlock('\u3130', '\u318F', - "HANGUL_COMPATIBILITY_JAMO"); + = new UnicodeBlock(0x3130, 0x318F, + "HANGUL_COMPATIBILITY_JAMO", + "Hangul Compatibility Jamo"); /** * Kanbun. - * '\u3190' - '\u319F'. + * 0x3190 - 0x319F. */ public static final UnicodeBlock KANBUN - = new UnicodeBlock('\u3190', '\u319F', - "KANBUN"); + = new UnicodeBlock(0x3190, 0x319F, + "KANBUN", + "Kanbun"); /** * Bopomofo Extended. - * '\u31A0' - '\u31BF'. + * 0x31A0 - 0x31BF. * @since 1.4 */ public static final UnicodeBlock BOPOMOFO_EXTENDED - = new UnicodeBlock('\u31A0', '\u31BF', - "BOPOMOFO_EXTENDED"); + = new UnicodeBlock(0x31A0, 0x31BF, + "BOPOMOFO_EXTENDED", + "Bopomofo Extended"); + + /** + * Katakana Phonetic Extensions. + * 0x31F0 - 0x31FF. + * @since 1.5 + */ + public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS + = new UnicodeBlock(0x31F0, 0x31FF, + "KATAKANA_PHONETIC_EXTENSIONS", + "Katakana Phonetic Extensions"); /** * Enclosed CJK Letters and Months. - * '\u3200' - '\u32FF'. + * 0x3200 - 0x32FF. */ public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS - = new UnicodeBlock('\u3200', '\u32FF', - "ENCLOSED_CJK_LETTERS_AND_MONTHS"); + = new UnicodeBlock(0x3200, 0x32FF, + "ENCLOSED_CJK_LETTERS_AND_MONTHS", + "Enclosed CJK Letters and Months"); /** * CJK Compatibility. - * '\u3300' - '\u33FF'. + * 0x3300 - 0x33FF. */ public static final UnicodeBlock CJK_COMPATIBILITY - = new UnicodeBlock('\u3300', '\u33FF', - "CJK_COMPATIBILITY"); + = new UnicodeBlock(0x3300, 0x33FF, + "CJK_COMPATIBILITY", + "CJK Compatibility"); /** * CJK Unified Ideographs Extension A. - * '\u3400' - '\u4DB5'. + * 0x3400 - 0x4DBF. * @since 1.4 */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A - = new UnicodeBlock('\u3400', '\u4DB5', - "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); + = new UnicodeBlock(0x3400, 0x4DBF, + "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", + "CJK Unified Ideographs Extension A"); + + /** + * Yijing Hexagram Symbols. + * 0x4DC0 - 0x4DFF. + * @since 1.5 + */ + public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS + = new UnicodeBlock(0x4DC0, 0x4DFF, + "YIJING_HEXAGRAM_SYMBOLS", + "Yijing Hexagram Symbols"); /** * CJK Unified Ideographs. - * '\u4E00' - '\u9FFF'. + * 0x4E00 - 0x9FFF. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS - = new UnicodeBlock('\u4E00', '\u9FFF', - "CJK_UNIFIED_IDEOGRAPHS"); + = new UnicodeBlock(0x4E00, 0x9FFF, + "CJK_UNIFIED_IDEOGRAPHS", + "CJK Unified Ideographs"); /** * Yi Syllables. - * '\uA000' - '\uA48F'. + * 0xA000 - 0xA48F. * @since 1.4 */ public static final UnicodeBlock YI_SYLLABLES - = new UnicodeBlock('\uA000', '\uA48F', - "YI_SYLLABLES"); + = new UnicodeBlock(0xA000, 0xA48F, + "YI_SYLLABLES", + "Yi Syllables"); /** * Yi Radicals. - * '\uA490' - '\uA4CF'. + * 0xA490 - 0xA4CF. * @since 1.4 */ public static final UnicodeBlock YI_RADICALS - = new UnicodeBlock('\uA490', '\uA4CF', - "YI_RADICALS"); + = new UnicodeBlock(0xA490, 0xA4CF, + "YI_RADICALS", + "Yi Radicals"); /** * Hangul Syllables. - * '\uAC00' - '\uD7A3'. + * 0xAC00 - 0xD7AF. */ public static final UnicodeBlock HANGUL_SYLLABLES - = new UnicodeBlock('\uAC00', '\uD7A3', - "HANGUL_SYLLABLES"); + = new UnicodeBlock(0xAC00, 0xD7AF, + "HANGUL_SYLLABLES", + "Hangul Syllables"); /** - * Surrogates Area. - * '\uD800' - '\uDFFF'. + * High Surrogates. + * 0xD800 - 0xDB7F. + * @since 1.5 */ - public static final UnicodeBlock SURROGATES_AREA - = new UnicodeBlock('\uD800', '\uDFFF', - "SURROGATES_AREA"); + public static final UnicodeBlock HIGH_SURROGATES + = new UnicodeBlock(0xD800, 0xDB7F, + "HIGH_SURROGATES", + "High Surrogates"); + + /** + * High Private Use Surrogates. + * 0xDB80 - 0xDBFF. + * @since 1.5 + */ + public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES + = new UnicodeBlock(0xDB80, 0xDBFF, + "HIGH_PRIVATE_USE_SURROGATES", + "High Private Use Surrogates"); + + /** + * Low Surrogates. + * 0xDC00 - 0xDFFF. + * @since 1.5 + */ + public static final UnicodeBlock LOW_SURROGATES + = new UnicodeBlock(0xDC00, 0xDFFF, + "LOW_SURROGATES", + "Low Surrogates"); /** * Private Use Area. - * '\uE000' - '\uF8FF'. + * 0xE000 - 0xF8FF. */ public static final UnicodeBlock PRIVATE_USE_AREA - = new UnicodeBlock('\uE000', '\uF8FF', - "PRIVATE_USE_AREA"); + = new UnicodeBlock(0xE000, 0xF8FF, + "PRIVATE_USE_AREA", + "Private Use Area"); /** * CJK Compatibility Ideographs. - * '\uF900' - '\uFAFF'. + * 0xF900 - 0xFAFF. */ public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS - = new UnicodeBlock('\uF900', '\uFAFF', - "CJK_COMPATIBILITY_IDEOGRAPHS"); + = new UnicodeBlock(0xF900, 0xFAFF, + "CJK_COMPATIBILITY_IDEOGRAPHS", + "CJK Compatibility Ideographs"); /** * Alphabetic Presentation Forms. - * '\uFB00' - '\uFB4F'. + * 0xFB00 - 0xFB4F. */ public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS - = new UnicodeBlock('\uFB00', '\uFB4F', - "ALPHABETIC_PRESENTATION_FORMS"); + = new UnicodeBlock(0xFB00, 0xFB4F, + "ALPHABETIC_PRESENTATION_FORMS", + "Alphabetic Presentation Forms"); /** * Arabic Presentation Forms-A. - * '\uFB50' - '\uFDFF'. + * 0xFB50 - 0xFDFF. */ public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A - = new UnicodeBlock('\uFB50', '\uFDFF', - "ARABIC_PRESENTATION_FORMS_A"); + = new UnicodeBlock(0xFB50, 0xFDFF, + "ARABIC_PRESENTATION_FORMS_A", + "Arabic Presentation Forms-A"); + + /** + * Variation Selectors. + * 0xFE00 - 0xFE0F. + * @since 1.5 + */ + public static final UnicodeBlock VARIATION_SELECTORS + = new UnicodeBlock(0xFE00, 0xFE0F, + "VARIATION_SELECTORS", + "Variation Selectors"); /** * Combining Half Marks. - * '\uFE20' - '\uFE2F'. + * 0xFE20 - 0xFE2F. */ public static final UnicodeBlock COMBINING_HALF_MARKS - = new UnicodeBlock('\uFE20', '\uFE2F', - "COMBINING_HALF_MARKS"); + = new UnicodeBlock(0xFE20, 0xFE2F, + "COMBINING_HALF_MARKS", + "Combining Half Marks"); /** * CJK Compatibility Forms. - * '\uFE30' - '\uFE4F'. + * 0xFE30 - 0xFE4F. */ public static final UnicodeBlock CJK_COMPATIBILITY_FORMS - = new UnicodeBlock('\uFE30', '\uFE4F', - "CJK_COMPATIBILITY_FORMS"); + = new UnicodeBlock(0xFE30, 0xFE4F, + "CJK_COMPATIBILITY_FORMS", + "CJK Compatibility Forms"); /** * Small Form Variants. - * '\uFE50' - '\uFE6F'. + * 0xFE50 - 0xFE6F. */ public static final UnicodeBlock SMALL_FORM_VARIANTS - = new UnicodeBlock('\uFE50', '\uFE6F', - "SMALL_FORM_VARIANTS"); + = new UnicodeBlock(0xFE50, 0xFE6F, + "SMALL_FORM_VARIANTS", + "Small Form Variants"); /** * Arabic Presentation Forms-B. - * '\uFE70' - '\uFEFE'. + * 0xFE70 - 0xFEFF. */ public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B - = new UnicodeBlock('\uFE70', '\uFEFE', - "ARABIC_PRESENTATION_FORMS_B"); + = new UnicodeBlock(0xFE70, 0xFEFF, + "ARABIC_PRESENTATION_FORMS_B", + "Arabic Presentation Forms-B"); /** * Halfwidth and Fullwidth Forms. - * '\uFF00' - '\uFFEF'. + * 0xFF00 - 0xFFEF. */ public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS - = new UnicodeBlock('\uFF00', '\uFFEF', - "HALFWIDTH_AND_FULLWIDTH_FORMS"); + = new UnicodeBlock(0xFF00, 0xFFEF, + "HALFWIDTH_AND_FULLWIDTH_FORMS", + "Halfwidth and Fullwidth Forms"); /** * Specials. - * '\uFEFF', '\uFFF0' - '\uFFFD'. + * 0xFFF0 - 0xFFFF. */ public static final UnicodeBlock SPECIALS - = new UnicodeBlock('\uFFF0', '\uFFFD', - "SPECIALS"); + = new UnicodeBlock(0xFFF0, 0xFFFF, + "SPECIALS", + "Specials"); + + /** + * Linear B Syllabary. + * 0x10000 - 0x1007F. + * @since 1.5 + */ + public static final UnicodeBlock LINEAR_B_SYLLABARY + = new UnicodeBlock(0x10000, 0x1007F, + "LINEAR_B_SYLLABARY", + "Linear B Syllabary"); + + /** + * Linear B Ideograms. + * 0x10080 - 0x100FF. + * @since 1.5 + */ + public static final UnicodeBlock LINEAR_B_IDEOGRAMS + = new UnicodeBlock(0x10080, 0x100FF, + "LINEAR_B_IDEOGRAMS", + "Linear B Ideograms"); + + /** + * Aegean Numbers. + * 0x10100 - 0x1013F. + * @since 1.5 + */ + public static final UnicodeBlock AEGEAN_NUMBERS + = new UnicodeBlock(0x10100, 0x1013F, + "AEGEAN_NUMBERS", + "Aegean Numbers"); + + /** + * Old Italic. + * 0x10300 - 0x1032F. + * @since 1.5 + */ + public static final UnicodeBlock OLD_ITALIC + = new UnicodeBlock(0x10300, 0x1032F, + "OLD_ITALIC", + "Old Italic"); + + /** + * Gothic. + * 0x10330 - 0x1034F. + * @since 1.5 + */ + public static final UnicodeBlock GOTHIC + = new UnicodeBlock(0x10330, 0x1034F, + "GOTHIC", + "Gothic"); + + /** + * Ugaritic. + * 0x10380 - 0x1039F. + * @since 1.5 + */ + public static final UnicodeBlock UGARITIC + = new UnicodeBlock(0x10380, 0x1039F, + "UGARITIC", + "Ugaritic"); + + /** + * Deseret. + * 0x10400 - 0x1044F. + * @since 1.5 + */ + public static final UnicodeBlock DESERET + = new UnicodeBlock(0x10400, 0x1044F, + "DESERET", + "Deseret"); + + /** + * Shavian. + * 0x10450 - 0x1047F. + * @since 1.5 + */ + public static final UnicodeBlock SHAVIAN + = new UnicodeBlock(0x10450, 0x1047F, + "SHAVIAN", + "Shavian"); + + /** + * Osmanya. + * 0x10480 - 0x104AF. + * @since 1.5 + */ + public static final UnicodeBlock OSMANYA + = new UnicodeBlock(0x10480, 0x104AF, + "OSMANYA", + "Osmanya"); + + /** + * Cypriot Syllabary. + * 0x10800 - 0x1083F. + * @since 1.5 + */ + public static final UnicodeBlock CYPRIOT_SYLLABARY + = new UnicodeBlock(0x10800, 0x1083F, + "CYPRIOT_SYLLABARY", + "Cypriot Syllabary"); + + /** + * Byzantine Musical Symbols. + * 0x1D000 - 0x1D0FF. + * @since 1.5 + */ + public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS + = new UnicodeBlock(0x1D000, 0x1D0FF, + "BYZANTINE_MUSICAL_SYMBOLS", + "Byzantine Musical Symbols"); + + /** + * Musical Symbols. + * 0x1D100 - 0x1D1FF. + * @since 1.5 + */ + public static final UnicodeBlock MUSICAL_SYMBOLS + = new UnicodeBlock(0x1D100, 0x1D1FF, + "MUSICAL_SYMBOLS", + "Musical Symbols"); + + /** + * Tai Xuan Jing Symbols. + * 0x1D300 - 0x1D35F. + * @since 1.5 + */ + public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS + = new UnicodeBlock(0x1D300, 0x1D35F, + "TAI_XUAN_JING_SYMBOLS", + "Tai Xuan Jing Symbols"); + + /** + * Mathematical Alphanumeric Symbols. + * 0x1D400 - 0x1D7FF. + * @since 1.5 + */ + public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS + = new UnicodeBlock(0x1D400, 0x1D7FF, + "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", + "Mathematical Alphanumeric Symbols"); + + /** + * CJK Unified Ideographs Extension B. + * 0x20000 - 0x2A6DF. + * @since 1.5 + */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B + = new UnicodeBlock(0x20000, 0x2A6DF, + "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", + "CJK Unified Ideographs Extension B"); + + /** + * CJK Compatibility Ideographs Supplement. + * 0x2F800 - 0x2FA1F. + * @since 1.5 + */ + public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT + = new UnicodeBlock(0x2F800, 0x2FA1F, + "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", + "CJK Compatibility Ideographs Supplement"); + + /** + * Tags. + * 0xE0000 - 0xE007F. + * @since 1.5 + */ + public static final UnicodeBlock TAGS + = new UnicodeBlock(0xE0000, 0xE007F, + "TAGS", + "Tags"); + + /** + * Variation Selectors Supplement. + * 0xE0100 - 0xE01EF. + * @since 1.5 + */ + public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT + = new UnicodeBlock(0xE0100, 0xE01EF, + "VARIATION_SELECTORS_SUPPLEMENT", + "Variation Selectors Supplement"); + + /** + * Supplementary Private Use Area-A. + * 0xF0000 - 0xFFFFF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A + = new UnicodeBlock(0xF0000, 0xFFFFF, + "SUPPLEMENTARY_PRIVATE_USE_AREA_A", + "Supplementary Private Use Area-A"); + + /** + * Supplementary Private Use Area-B. + * 0x100000 - 0x10FFFF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B + = new UnicodeBlock(0x100000, 0x10FFFF, + "SUPPLEMENTARY_PRIVATE_USE_AREA_B", + "Supplementary Private Use Area-B"); + + /** + * Surrogates Area. + * 'D800' - 'DFFF'. + * @deprecated As of 1.5, the three areas, + * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>, + * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a> + * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined + * by the Unicode standard, should be used in preference to + * this. These are also returned from calls to <code>of(int)</code> + * and <code>of(char)</code>. + */ + public static final UnicodeBlock SURROGATES_AREA + = new UnicodeBlock(0xD800, 0xDFFF, + "SURROGATES_AREA", + "Surrogates Area"); /** * The defined subsets. @@ -903,6 +1531,7 @@ public final class Character implements Serializable, Comparable COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC, + CYRILLIC_SUPPLEMENTARY, ARMENIAN, HEBREW, ARABIC, @@ -929,8 +1558,16 @@ public final class Character implements Serializable, Comparable UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC, + TAGALOG, + HANUNOO, + BUHID, + TAGBANWA, KHMER, MONGOLIAN, + LIMBU, + TAI_LE, + KHMER_SYMBOLS, + PHONETIC_EXTENSIONS, LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED, GENERAL_PUNCTUATION, @@ -950,7 +1587,13 @@ public final class Character implements Serializable, Comparable GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS, DINGBATS, + MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, + SUPPLEMENTAL_ARROWS_A, BRAILLE_PATTERNS, + SUPPLEMENTAL_ARROWS_B, + MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, + SUPPLEMENTAL_MATHEMATICAL_OPERATORS, + MISCELLANEOUS_SYMBOLS_AND_ARROWS, CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, IDEOGRAPHIC_DESCRIPTION_CHARACTERS, @@ -961,28 +1604,337 @@ public final class Character implements Serializable, Comparable HANGUL_COMPATIBILITY_JAMO, KANBUN, BOPOMOFO_EXTENDED, + KATAKANA_PHONETIC_EXTENSIONS, ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, + YIJING_HEXAGRAM_SYMBOLS, CJK_UNIFIED_IDEOGRAPHS, YI_SYLLABLES, YI_RADICALS, HANGUL_SYLLABLES, - SURROGATES_AREA, + HIGH_SURROGATES, + HIGH_PRIVATE_USE_SURROGATES, + LOW_SURROGATES, PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS, ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A, + VARIATION_SELECTORS, COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS, SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B, HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS, + LINEAR_B_SYLLABARY, + LINEAR_B_IDEOGRAMS, + AEGEAN_NUMBERS, + OLD_ITALIC, + GOTHIC, + UGARITIC, + DESERET, + SHAVIAN, + OSMANYA, + CYPRIOT_SYLLABARY, + BYZANTINE_MUSICAL_SYMBOLS, + MUSICAL_SYMBOLS, + TAI_XUAN_JING_SYMBOLS, + MATHEMATICAL_ALPHANUMERIC_SYMBOLS, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, + TAGS, + VARIATION_SELECTORS_SUPPLEMENT, + SUPPLEMENTARY_PRIVATE_USE_AREA_A, + SUPPLEMENTARY_PRIVATE_USE_AREA_B, }; } // class UnicodeBlock /** + * A class to encompass all the properties of characters in the + * private use blocks in the Unicode standard. This class extends + * UnassignedCharacters because the return type from getType() is + * different. + * @author Anthony Balkissoon abalkiss at redhat dot com + * + */ + private static class PrivateUseCharacters extends UnassignedCharacters + { + /** + * Returns the type of the character cp. + */ + static int getType(int cp) + { + // The upper 2 code points in any plane are considered unassigned, + // even in the private-use planes. + if ((cp & 0xffff) >= 0xfffe) + return UnassignedCharacters.getType(cp); + return PRIVATE_USE; + } + + /** + * Returns true if the character cp is defined. + */ + static boolean isDefined(int cp) + { + // The upper 2 code points in any plane are considered unassigned, + // even in the private-use planes. + if ((cp & 0xffff) >= 0xfffe) + return UnassignedCharacters.isDefined(cp); + return true; + } + + /** + * Gets the directionality for the character cp. + */ + static byte getDirectionality(int cp) + { + if ((cp & 0xffff) >= 0xfffe) + return UnassignedCharacters.getDirectionality(cp); + return DIRECTIONALITY_LEFT_TO_RIGHT; + } + } + + /** + * A class to encompass all the properties of code points that are + * currently undefined in the Unicode standard. + * @author Anthony Balkissoon abalkiss at redhat dot com + * + */ + private static class UnassignedCharacters + { + /** + * Returns the numeric value for the unassigned characters. + * @param cp the character + * @param radix the radix (not used) + * @return the numeric value of this character in this radix + */ + static int digit(int cp, int radix) + { + return -1; + } + + /** + * Returns the Unicode directionality property for unassigned + * characters. + * @param cp the character + * @return DIRECTIONALITY_UNDEFINED + */ + static byte getDirectionality(int cp) + { + return DIRECTIONALITY_UNDEFINED; + } + + /** + * Returns -1, the numeric value for unassigned Unicode characters. + * @param cp the character + * @return -1 + */ + static int getNumericValue(int cp) + { + return -1; + } + + /** + * Returns UNASSIGNED, the type of unassigned Unicode characters. + * @param cp the character + * @return UNASSIGNED + */ + static int getType(int cp) + { + return UNASSIGNED; + } + + /** + * Returns false to indiciate that the character is not defined in the + * Unicode standard. + * @param cp the character + * @return false + */ + static boolean isDefined(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character is not a digit. + * @param cp the character + * @return false + */ + static boolean isDigit(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot be ignored + * within an identifier + * @param cp the character + * @return false + */ + static boolean isIdentifierIgnorable(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot be part of a + * Java identifier. + * @param cp the character + * @return false + */ + static boolean isJavaIdentifierPart(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot be start a + * Java identifier. + * @param cp the character + * @return false + */ + static boolean isJavaIdentiferStart(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character is not a letter. + * @param cp the character + * @return false + */ + static boolean isLetter(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot is neither a letter + * nor a digit. + * @param cp the character + * @return false + */ + static boolean isLetterOrDigit(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character is not a lowercase letter. + * @param cp the character + * @return false + */ + static boolean isLowerCase(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot is not mirrored. + * @param cp the character + * @return false + */ + static boolean isMirrored(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character is not a space character. + * @param cp the character + * @return false + */ + static boolean isSpaceChar(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character it not a titlecase letter. + * @param cp the character + * @return false + */ + static boolean isTitleCase(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot be part of a + * Unicode identifier. + * @param cp the character + * @return false + */ + static boolean isUnicodeIdentifierPart(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character cannot start a + * Unicode identifier. + * @param cp the character + * @return false + */ + static boolean isUnicodeIdentifierStart(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character is not an uppercase letter. + * @param cp the character + * @return false + */ + static boolean isUpperCase(int cp) + { + return false; + } + + /** + * Returns false to indicate that the character is not a whitespace + * character. + * @param cp the character + * @return false + */ + static boolean isWhiteSpace(int cp) + { + return false; + } + + /** + * Returns cp to indicate this character has no lowercase conversion. + * @param cp the character + * @return cp + */ + static int toLowerCase(int cp) + { + return cp; + } + + /** + * Returns cp to indicate this character has no titlecase conversion. + * @param cp the character + * @return cp + */ + static int toTitleCase(int cp) + { + return cp; + } + + /** + * Returns cp to indicate this character has no uppercase conversion. + * @param cp the character + * @return cp + */ + static int toUpperCase(int cp) + { + return cp; + } + } + + /** * The immutable value of this Character. * * @serial the value of this Character @@ -1399,39 +2351,128 @@ public final class Character implements Serializable, Comparable /** * Stores unicode block offset lookup table. Exploit package visibility of * String.value to avoid copying the array. - * @see #readChar(char) + * @see #readCodePoint(int) * @see CharData#BLOCKS */ - private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS); + private static final char[][] blocks = + new char[][]{ + String.zeroBasedStringValue(CharData.BLOCKS[0]), + String.zeroBasedStringValue(CharData.BLOCKS[1]), + String.zeroBasedStringValue(CharData.BLOCKS[2]), + String.zeroBasedStringValue(CharData.BLOCKS[3]), + String.zeroBasedStringValue(CharData.BLOCKS[4]), + String.zeroBasedStringValue(CharData.BLOCKS[5]), + String.zeroBasedStringValue(CharData.BLOCKS[6]), + String.zeroBasedStringValue(CharData.BLOCKS[7]), + String.zeroBasedStringValue(CharData.BLOCKS[8]), + String.zeroBasedStringValue(CharData.BLOCKS[9]), + String.zeroBasedStringValue(CharData.BLOCKS[10]), + String.zeroBasedStringValue(CharData.BLOCKS[11]), + String.zeroBasedStringValue(CharData.BLOCKS[12]), + String.zeroBasedStringValue(CharData.BLOCKS[13]), + String.zeroBasedStringValue(CharData.BLOCKS[14]), + String.zeroBasedStringValue(CharData.BLOCKS[15]), + String.zeroBasedStringValue(CharData.BLOCKS[16])}; /** * Stores unicode attribute offset lookup table. Exploit package visibility * of String.value to avoid copying the array. * @see CharData#DATA - */ - private static final char[] data = String.zeroBasedStringValue(CharData.DATA); + */ + private static final char[][] data = + new char[][]{ + String.zeroBasedStringValue(CharData.DATA[0]), + String.zeroBasedStringValue(CharData.DATA[1]), + String.zeroBasedStringValue(CharData.DATA[2]), + String.zeroBasedStringValue(CharData.DATA[3]), + String.zeroBasedStringValue(CharData.DATA[4]), + String.zeroBasedStringValue(CharData.DATA[5]), + String.zeroBasedStringValue(CharData.DATA[6]), + String.zeroBasedStringValue(CharData.DATA[7]), + String.zeroBasedStringValue(CharData.DATA[8]), + String.zeroBasedStringValue(CharData.DATA[9]), + String.zeroBasedStringValue(CharData.DATA[10]), + String.zeroBasedStringValue(CharData.DATA[11]), + String.zeroBasedStringValue(CharData.DATA[12]), + String.zeroBasedStringValue(CharData.DATA[13]), + String.zeroBasedStringValue(CharData.DATA[14]), + String.zeroBasedStringValue(CharData.DATA[15]), + String.zeroBasedStringValue(CharData.DATA[16])}; /** * Stores unicode numeric value attribute table. Exploit package visibility * of String.value to avoid copying the array. * @see CharData#NUM_VALUE */ - private static final char[] numValue - = String.zeroBasedStringValue(CharData.NUM_VALUE); + private static final char[][] numValue = + new char[][]{ + String.zeroBasedStringValue(CharData.NUM_VALUE[0]), + String.zeroBasedStringValue(CharData.NUM_VALUE[1]), + String.zeroBasedStringValue(CharData.NUM_VALUE[2]), + String.zeroBasedStringValue(CharData.NUM_VALUE[3]), + String.zeroBasedStringValue(CharData.NUM_VALUE[4]), + String.zeroBasedStringValue(CharData.NUM_VALUE[5]), + String.zeroBasedStringValue(CharData.NUM_VALUE[6]), + String.zeroBasedStringValue(CharData.NUM_VALUE[7]), + String.zeroBasedStringValue(CharData.NUM_VALUE[8]), + String.zeroBasedStringValue(CharData.NUM_VALUE[9]), + String.zeroBasedStringValue(CharData.NUM_VALUE[10]), + String.zeroBasedStringValue(CharData.NUM_VALUE[11]), + String.zeroBasedStringValue(CharData.NUM_VALUE[12]), + String.zeroBasedStringValue(CharData.NUM_VALUE[13]), + String.zeroBasedStringValue(CharData.NUM_VALUE[14]), + String.zeroBasedStringValue(CharData.NUM_VALUE[15]), + String.zeroBasedStringValue(CharData.NUM_VALUE[16])}; /** * Stores unicode uppercase attribute table. Exploit package visibility * of String.value to avoid copying the array. * @see CharData#UPPER - */ - private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER); + */ + private static final char[][] upper = + new char[][]{ + String.zeroBasedStringValue(CharData.UPPER[0]), + String.zeroBasedStringValue(CharData.UPPER[1]), + String.zeroBasedStringValue(CharData.UPPER[2]), + String.zeroBasedStringValue(CharData.UPPER[3]), + String.zeroBasedStringValue(CharData.UPPER[4]), + String.zeroBasedStringValue(CharData.UPPER[5]), + String.zeroBasedStringValue(CharData.UPPER[6]), + String.zeroBasedStringValue(CharData.UPPER[7]), + String.zeroBasedStringValue(CharData.UPPER[8]), + String.zeroBasedStringValue(CharData.UPPER[9]), + String.zeroBasedStringValue(CharData.UPPER[10]), + String.zeroBasedStringValue(CharData.UPPER[11]), + String.zeroBasedStringValue(CharData.UPPER[12]), + String.zeroBasedStringValue(CharData.UPPER[13]), + String.zeroBasedStringValue(CharData.UPPER[14]), + String.zeroBasedStringValue(CharData.UPPER[15]), + String.zeroBasedStringValue(CharData.UPPER[16])}; /** * Stores unicode lowercase attribute table. Exploit package visibility * of String.value to avoid copying the array. * @see CharData#LOWER */ - private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER); + private static final char[][] lower = + new char[][]{ + String.zeroBasedStringValue(CharData.LOWER[0]), + String.zeroBasedStringValue(CharData.LOWER[1]), + String.zeroBasedStringValue(CharData.LOWER[2]), + String.zeroBasedStringValue(CharData.LOWER[3]), + String.zeroBasedStringValue(CharData.LOWER[4]), + String.zeroBasedStringValue(CharData.LOWER[5]), + String.zeroBasedStringValue(CharData.LOWER[6]), + String.zeroBasedStringValue(CharData.LOWER[7]), + String.zeroBasedStringValue(CharData.LOWER[8]), + String.zeroBasedStringValue(CharData.LOWER[9]), + String.zeroBasedStringValue(CharData.LOWER[10]), + String.zeroBasedStringValue(CharData.LOWER[11]), + String.zeroBasedStringValue(CharData.LOWER[12]), + String.zeroBasedStringValue(CharData.LOWER[13]), + String.zeroBasedStringValue(CharData.LOWER[14]), + String.zeroBasedStringValue(CharData.LOWER[15]), + String.zeroBasedStringValue(CharData.LOWER[16])}; /** * Stores unicode direction attribute table. Exploit package visibility @@ -1439,14 +2480,32 @@ public final class Character implements Serializable, Comparable * @see CharData#DIRECTION */ // Package visible for use by String. - static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION); + static final char[][] direction = + new char[][]{ + String.zeroBasedStringValue(CharData.DIRECTION[0]), + String.zeroBasedStringValue(CharData.DIRECTION[1]), + String.zeroBasedStringValue(CharData.DIRECTION[2]), + String.zeroBasedStringValue(CharData.DIRECTION[3]), + String.zeroBasedStringValue(CharData.DIRECTION[4]), + String.zeroBasedStringValue(CharData.DIRECTION[5]), + String.zeroBasedStringValue(CharData.DIRECTION[6]), + String.zeroBasedStringValue(CharData.DIRECTION[7]), + String.zeroBasedStringValue(CharData.DIRECTION[8]), + String.zeroBasedStringValue(CharData.DIRECTION[9]), + String.zeroBasedStringValue(CharData.DIRECTION[10]), + String.zeroBasedStringValue(CharData.DIRECTION[11]), + String.zeroBasedStringValue(CharData.DIRECTION[12]), + String.zeroBasedStringValue(CharData.DIRECTION[13]), + String.zeroBasedStringValue(CharData.DIRECTION[14]), + String.zeroBasedStringValue(CharData.DIRECTION[15]), + String.zeroBasedStringValue(CharData.DIRECTION[16])}; /** * Stores unicode titlecase table. Exploit package visibility of * String.value to avoid copying the array. * @see CharData#TITLE */ - private static final char[] title = String.zeroBasedStringValue(CharData.TITLE); + private static final char[] title = String.zeroBasedStringValue(CharData.TITLE); /** * Mask for grabbing the type out of the contents of data. @@ -1538,7 +2597,7 @@ public final class Character implements Serializable, Comparable * 5 bits are the character type, the next 2 bits are flags, and the top * 9 bits are the offset into the attribute tables. * - * @param ch the character to look up + * @param codePoint the character to look up * @return the character's attribute offset and type * @see #TYPE_MASK * @see #NO_BREAK_MASK @@ -1546,11 +2605,11 @@ public final class Character implements Serializable, Comparable * @see CharData#DATA * @see CharData#SHIFT */ - // Package visible for use in String. - static char readChar(char ch) + static char readCodePoint(int codePoint) { - // Perform 16-bit addition to find the correct entry in data. - return data[(char) (blocks[ch >> CharData.SHIFT] + ch)]; + int plane = codePoint >>> 16; + char offset = (char) (codePoint & 0xffff); + return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)]; } /** @@ -1623,7 +2682,8 @@ public final class Character implements Serializable, Comparable /** * Determines if a character is a Unicode lowercase letter. For example, - * <code>'a'</code> is lowercase. + * <code>'a'</code> is lowercase. Returns true if getType() returns + * LOWERCASE_LETTER. * <br> * lowercase = [Ll] * @@ -1636,12 +2696,34 @@ public final class Character implements Serializable, Comparable */ public static boolean isLowerCase(char ch) { - return getType(ch) == LOWERCASE_LETTER; + return isLowerCase((int)ch); + } + + /** + * Determines if a character is a Unicode lowercase letter. For example, + * <code>'a'</code> is lowercase. Returns true if getType() returns + * LOWERCASE_LETTER. + * <br> + * lowercase = [Ll] + * + * @param codePoint character to test + * @return true if ch is a Unicode lowercase letter, else false + * @see #isUpperCase(char) + * @see #isTitleCase(char) + * @see #toLowerCase(char) + * @see #getType(char) + * + * @since 1.5 + */ + public static boolean isLowerCase(int codePoint) + { + return getType(codePoint) == LOWERCASE_LETTER; } /** * Determines if a character is a Unicode uppercase letter. For example, - * <code>'A'</code> is uppercase. + * <code>'A'</code> is uppercase. Returns true if getType() returns + * UPPERCASE_LETTER. * <br> * uppercase = [Lu] * @@ -1654,12 +2736,34 @@ public final class Character implements Serializable, Comparable */ public static boolean isUpperCase(char ch) { - return getType(ch) == UPPERCASE_LETTER; + return isUpperCase((int)ch); + } + + /** + * Determines if a character is a Unicode uppercase letter. For example, + * <code>'A'</code> is uppercase. Returns true if getType() returns + * UPPERCASE_LETTER. + * <br> + * uppercase = [Lu] + * + * @param codePoint character to test + * @return true if ch is a Unicode uppercase letter, else false + * @see #isLowerCase(char) + * @see #isTitleCase(char) + * @see #toUpperCase(char) + * @see #getType(char) + * + * @since 1.5 + */ + public static boolean isUpperCase(int codePoint) + { + return getType(codePoint) == UPPERCASE_LETTER; } /** * Determines if a character is a Unicode titlecase letter. For example, * the character "Lj" (Latin capital L with small letter j) is titlecase. + * True if getType() returns TITLECASE_LETTER. * <br> * titlecase = [Lt] * @@ -1672,12 +2776,35 @@ public final class Character implements Serializable, Comparable */ public static boolean isTitleCase(char ch) { - return getType(ch) == TITLECASE_LETTER; + return isTitleCase((int)ch); } + + /** + * Determines if a character is a Unicode titlecase letter. For example, + * the character "Lj" (Latin capital L with small letter j) is titlecase. + * True if getType() returns TITLECASE_LETTER. + * <br> + * titlecase = [Lt] + * + * @param codePoint character to test + * @return true if ch is a Unicode titlecase letter, else false + * @see #isLowerCase(char) + * @see #isUpperCase(char) + * @see #toTitleCase(char) + * @see #getType(char) + * + * @since 1.5 + */ + public static boolean isTitleCase(int codePoint) + { + return getType(codePoint) == TITLECASE_LETTER; + } + /** * Determines if a character is a Unicode decimal digit. For example, - * <code>'0'</code> is a digit. + * <code>'0'</code> is a digit. A character is a Unicode digit if + * getType() returns DECIMAL_DIGIT_NUMBER. * <br> * Unicode decimal digit = [Nd] * @@ -1689,7 +2816,28 @@ public final class Character implements Serializable, Comparable */ public static boolean isDigit(char ch) { - return getType(ch) == DECIMAL_DIGIT_NUMBER; + return isDigit((int)ch); + } + + /** + * Determines if a character is a Unicode decimal digit. For example, + * <code>'0'</code> is a digit. A character is a Unicode digit if + * getType() returns DECIMAL_DIGIT_NUMBER. + * <br> + * Unicode decimal digit = [Nd] + * + * @param codePoint character to test + * @return true if ch is a Unicode decimal digit, else false + * @see #digit(char, int) + * @see #forDigit(int, int) + * @see #getType(char) + * + * @since 1.5 + */ + + public static boolean isDigit(int codePoint) + { + return getType(codePoint) == DECIMAL_DIGIT_NUMBER; } /** @@ -1709,12 +2857,37 @@ public final class Character implements Serializable, Comparable */ public static boolean isDefined(char ch) { - return getType(ch) != UNASSIGNED; + return isDefined((int)ch); + } + + /** + * Determines if a character is part of the Unicode Standard. This is an + * evolving standard, but covers every character in the data file. + * <br> + * defined = not [Cn] + * + * @param codePoint character to test + * @return true if ch is a Unicode character, else false + * @see #isDigit(char) + * @see #isLetter(char) + * @see #isLetterOrDigit(char) + * @see #isLowerCase(char) + * @see #isTitleCase(char) + * @see #isUpperCase(char) + * + * @since 1.5 + */ + public static boolean isDefined(int codePoint) + { + return getType(codePoint) != UNASSIGNED; } /** * Determines if a character is a Unicode letter. Not all letters have case, * so this may return true when isLowerCase and isUpperCase return false. + * A character is a Unicode letter if getType() returns one of + * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, + * or OTHER_LETTER. * <br> * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo] * @@ -1732,12 +2905,242 @@ public final class Character implements Serializable, Comparable */ public static boolean isLetter(char ch) { - return ((1 << getType(ch)) - & ((1 << UPPERCASE_LETTER) - | (1 << LOWERCASE_LETTER) - | (1 << TITLECASE_LETTER) - | (1 << MODIFIER_LETTER) - | (1 << OTHER_LETTER))) != 0; + return isLetter((int)ch); + } + + /** + * Determines if a character is a Unicode letter. Not all letters have case, + * so this may return true when isLowerCase and isUpperCase return false. + * A character is a Unicode letter if getType() returns one of + * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER, + * or OTHER_LETTER. + * <br> + * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo] + * + * @param codePoint character to test + * @return true if ch is a Unicode letter, else false + * @see #isDigit(char) + * @see #isJavaIdentifierStart(char) + * @see #isJavaLetter(char) + * @see #isJavaLetterOrDigit(char) + * @see #isLetterOrDigit(char) + * @see #isLowerCase(char) + * @see #isTitleCase(char) + * @see #isUnicodeIdentifierStart(char) + * @see #isUpperCase(char) + * + * @since 1.5 + */ + public static boolean isLetter(int codePoint) + { + return ((1 << getType(codePoint)) + & ((1 << UPPERCASE_LETTER) + | (1 << LOWERCASE_LETTER) + | (1 << TITLECASE_LETTER) + | (1 << MODIFIER_LETTER) + | (1 << OTHER_LETTER))) != 0; + } + /** + * Returns the index into the given CharSequence that is offset + * <code>codePointOffset</code> code points from <code>index</code>. + * @param seq the CharSequence + * @param index the start position in the CharSequence + * @param codePointOffset the number of code points offset from the start + * position + * @return the index into the CharSequence that is codePointOffset code + * points offset from index + * + * @throws NullPointerException if seq is null + * @throws IndexOutOfBoundsException if index is negative or greater than the + * length of the sequence. + * @throws IndexOutOfBoundsException if codePointOffset is positive and the + * subsequence from index to the end of seq has fewer than codePointOffset + * code points + * @throws IndexOutOfBoundsException if codePointOffset is negative and the + * subsequence from the start of seq to index has fewer than + * (-codePointOffset) code points + * @since 1.5 + */ + public static int offsetByCodePoints(CharSequence seq, + int index, + int codePointOffset) + { + int len = seq.length(); + if (index < 0 || index > len) + throw new IndexOutOfBoundsException(); + + int numToGo = codePointOffset; + int offset = index; + int adjust = 1; + if (numToGo >= 0) + { + for (; numToGo > 0; offset++) + { + numToGo--; + if (Character.isHighSurrogate(seq.charAt(offset)) + && (offset + 1) < len + && Character.isLowSurrogate(seq.charAt(offset + 1))) + offset++; + } + return offset; + } + else + { + numToGo *= -1; + for (; numToGo > 0;) + { + numToGo--; + offset--; + if (Character.isLowSurrogate(seq.charAt(offset)) + && (offset - 1) >= 0 + && Character.isHighSurrogate(seq.charAt(offset - 1))) + offset--; + } + return offset; + } + } + + /** + * Returns the index into the given char subarray that is offset + * <code>codePointOffset</code> code points from <code>index</code>. + * @param a the char array + * @param start the start index of the subarray + * @param count the length of the subarray + * @param index the index to be offset + * @param codePointOffset the number of code points offset from <code>index + * </code> + * @return the index into the char array + * + * @throws NullPointerException if a is null + * @throws IndexOutOfBoundsException if start or count is negative or if + * start + count is greater than the length of the array + * @throws IndexOutOfBoundsException if index is less than start or larger + * than start + count + * @throws IndexOutOfBoundsException if codePointOffset is positive and the + * subarray from index to start + count - 1 has fewer than codePointOffset + * code points. + * @throws IndexOutOfBoundsException if codePointOffset is negative and the + * subarray from start to index - 1 has fewer than (-codePointOffset) code + * points + * + * @since 1.5 + */ + public static int offsetByCodePoints(char[] a, + int start, + int count, + int index, + int codePointOffset) + { + int len = a.length; + int end = start + count; + if (start < 0 || count < 0 || end > len || index < start || index > end) + throw new IndexOutOfBoundsException(); + + int numToGo = codePointOffset; + int offset = index; + int adjust = 1; + if (numToGo >= 0) + { + for (; numToGo > 0; offset++) + { + numToGo--; + if (Character.isHighSurrogate(a[offset]) + && (offset + 1) < len + && Character.isLowSurrogate(a[offset + 1])) + offset++; + } + return offset; + } + else + { + numToGo *= -1; + for (; numToGo > 0;) + { + numToGo--; + offset--; + if (Character.isLowSurrogate(a[offset]) + && (offset - 1) >= 0 + && Character.isHighSurrogate(a[offset - 1])) + offset--; + if (offset < start) + throw new IndexOutOfBoundsException(); + } + return offset; + } + + } + + /** + * Returns the number of Unicode code points in the specified range of the + * given CharSequence. The first char in the range is at position + * beginIndex and the last one is at position endIndex - 1. Paired + * surrogates (supplementary characters are represented by a pair of chars - + * one from the high surrogates and one from the low surrogates) + * count as just one code point. + * @param seq the CharSequence to inspect + * @param beginIndex the beginning of the range + * @param endIndex the end of the range + * @return the number of Unicode code points in the given range of the + * sequence + * @throws NullPointerException if seq is null + * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is + * larger than the length of seq, or if beginIndex is greater than endIndex. + * @since 1.5 + */ + public static int codePointCount(CharSequence seq, int beginIndex, + int endIndex) + { + int len = seq.length(); + if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) + throw new IndexOutOfBoundsException(); + + int count = 0; + for (int i = beginIndex; i < endIndex; i++) + { + count++; + // If there is a pairing, count it only once. + if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex + && isLowSurrogate(seq.charAt(i + 1))) + i ++; + } + return count; + } + + /** + * Returns the number of Unicode code points in the specified range of the + * given char array. The first char in the range is at position + * offset and the length of the range is count. Paired surrogates + * (supplementary characters are represented by a pair of chars - + * one from the high surrogates and one from the low surrogates) + * count as just one code point. + * @param a the char array to inspect + * @param offset the beginning of the range + * @param count the length of the range + * @return the number of Unicode code points in the given range of the + * array + * @throws NullPointerException if a is null + * @throws IndexOutOfBoundsException if offset or count is negative or if + * offset + countendIndex is larger than the length of a. + * @since 1.5 + */ + public static int codePointCount(char[] a, int offset, + int count) + { + int len = a.length; + int end = offset + count; + if (offset < 0 || count < 0 || end > len) + throw new IndexOutOfBoundsException(); + + int counter = 0; + for (int i = offset; i < end; i++) + { + counter++; + // If there is a pairing, count it only once. + if (isHighSurrogate(a[i]) && (i + 1) < end + && isLowSurrogate(a[i + 1])) + i ++; + } + return counter; } /** @@ -1757,16 +3160,38 @@ public final class Character implements Serializable, Comparable */ public static boolean isLetterOrDigit(char ch) { - return ((1 << getType(ch)) - & ((1 << UPPERCASE_LETTER) - | (1 << LOWERCASE_LETTER) - | (1 << TITLECASE_LETTER) - | (1 << MODIFIER_LETTER) - | (1 << OTHER_LETTER) - | (1 << DECIMAL_DIGIT_NUMBER))) != 0; + return isLetterOrDigit((int)ch); } /** + * Determines if a character is a Unicode letter or a Unicode digit. This + * is the combination of isLetter and isDigit. + * <br> + * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd] + * + * @param codePoint character to test + * @return true if ch is a Unicode letter or a Unicode digit, else false + * @see #isDigit(char) + * @see #isJavaIdentifierPart(char) + * @see #isJavaLetter(char) + * @see #isJavaLetterOrDigit(char) + * @see #isLetter(char) + * @see #isUnicodeIdentifierPart(char) + * + * @since 1.5 + */ + public static boolean isLetterOrDigit(int codePoint) + { + return ((1 << getType(codePoint)) + & ((1 << UPPERCASE_LETTER) + | (1 << LOWERCASE_LETTER) + | (1 << TITLECASE_LETTER) + | (1 << MODIFIER_LETTER) + | (1 << OTHER_LETTER) + | (1 << DECIMAL_DIGIT_NUMBER))) != 0; + } + + /** * Determines if a character can start a Java identifier. This is the * combination of isLetter, any character where getType returns * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation @@ -1827,7 +3252,27 @@ public final class Character implements Serializable, Comparable */ public static boolean isJavaIdentifierStart(char ch) { - return ((1 << getType(ch)) + return isJavaIdentifierStart((int)ch); + } + + /** + * Determines if a character can start a Java identifier. This is the + * combination of isLetter, any character where getType returns + * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation + * (like '_'). + * <br> + * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] + * + * @param codePoint character to test + * @return true if ch can start a Java identifier, else false + * @see #isJavaIdentifierPart(char) + * @see #isLetter(char) + * @see #isUnicodeIdentifierStart(char) + * @since 1.5 + */ + public static boolean isJavaIdentifierStart(int codePoint) + { + return ((1 << getType(codePoint)) & ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) | (1 << TITLECASE_LETTER) @@ -1837,7 +3282,7 @@ public final class Character implements Serializable, Comparable | (1 << CURRENCY_SYMBOL) | (1 << CONNECTOR_PUNCTUATION))) != 0; } - + /** * Determines if a character can follow the first letter in * a Java identifier. This is the combination of isJavaLetter (isLetter, @@ -1859,7 +3304,31 @@ public final class Character implements Serializable, Comparable */ public static boolean isJavaIdentifierPart(char ch) { - int category = getType(ch); + return isJavaIdentifierPart((int)ch); + } + + /** + * Determines if a character can follow the first letter in + * a Java identifier. This is the combination of isJavaLetter (isLetter, + * type of LETTER_NUMBER, currency, connecting punctuation) and digit, + * numeric letter (like Roman numerals), combining marks, non-spacing marks, + * or isIdentifierIgnorable. + * <br> + * Java identifier extender = + * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf] + * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F + * + * @param codePoint character to test + * @return true if ch can follow the first letter in a Java identifier + * @see #isIdentifierIgnorable(char) + * @see #isJavaIdentifierStart(char) + * @see #isLetterOrDigit(char) + * @see #isUnicodeIdentifierPart(char) + * @since 1.5 + */ + public static boolean isJavaIdentifierPart(int codePoint) + { + int category = getType(codePoint); return ((1 << category) & ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) @@ -1873,7 +3342,7 @@ public final class Character implements Serializable, Comparable | (1 << CURRENCY_SYMBOL) | (1 << CONNECTOR_PUNCTUATION) | (1 << FORMAT))) != 0 - || (category == CONTROL && isIdentifierIgnorable(ch)); + || (category == CONTROL && isIdentifierIgnorable(codePoint)); } /** @@ -1892,7 +3361,26 @@ public final class Character implements Serializable, Comparable */ public static boolean isUnicodeIdentifierStart(char ch) { - return ((1 << getType(ch)) + return isUnicodeIdentifierStart((int)ch); + } + + /** + * Determines if a character can start a Unicode identifier. Only + * letters can start a Unicode identifier, but this includes characters + * in LETTER_NUMBER. + * <br> + * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl] + * + * @param codePoint character to test + * @return true if ch can start a Unicode identifier, else false + * @see #isJavaIdentifierStart(char) + * @see #isLetter(char) + * @see #isUnicodeIdentifierPart(char) + * @since 1.5 + */ + public static boolean isUnicodeIdentifierStart(int codePoint) + { + return ((1 << getType(codePoint)) & ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) | (1 << TITLECASE_LETTER) @@ -1921,7 +3409,30 @@ public final class Character implements Serializable, Comparable */ public static boolean isUnicodeIdentifierPart(char ch) { - int category = getType(ch); + return isUnicodeIdentifierPart((int)ch); + } + + /** + * Determines if a character can follow the first letter in + * a Unicode identifier. This includes letters, connecting punctuation, + * digits, numeric letters, combining marks, non-spacing marks, and + * isIdentifierIgnorable. + * <br> + * Unicode identifier extender = + * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]| + * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F + * + * @param codePoint character to test + * @return true if ch can follow the first letter in a Unicode identifier + * @see #isIdentifierIgnorable(char) + * @see #isJavaIdentifierPart(char) + * @see #isLetterOrDigit(char) + * @see #isUnicodeIdentifierStart(char) + * @since 1.5 + */ + public static boolean isUnicodeIdentifierPart(int codePoint) + { + int category = getType(codePoint); return ((1 << category) & ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) @@ -1934,7 +3445,7 @@ public final class Character implements Serializable, Comparable | (1 << LETTER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | (1 << FORMAT))) != 0 - || (category == CONTROL && isIdentifierIgnorable(ch)); + || (category == CONTROL && isIdentifierIgnorable(codePoint)); } /** @@ -1955,9 +3466,32 @@ public final class Character implements Serializable, Comparable */ public static boolean isIdentifierIgnorable(char ch) { - return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F' - || (ch <= '\u001B' && ch >= '\u000E'))) - || getType(ch) == FORMAT; + return isIdentifierIgnorable((int)ch); + } + /** + * Determines if a character is ignorable in a Unicode identifier. This + * includes the non-whitespace ISO control characters (<code>'\u0000'</code> + * through <code>'\u0008'</code>, <code>'\u000E'</code> through + * <code>'\u001B'</code>, and <code>'\u007F'</code> through + * <code>'\u009F'</code>), and FORMAT characters. + * <br> + * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B + * |U+007F-U+009F + * + * @param codePoint character to test + * @return true if ch is ignorable in a Unicode or Java identifier + * @see #isJavaIdentifierPart(char) + * @see #isUnicodeIdentifierPart(char) + * @since 1.5 + */ + public static boolean isIdentifierIgnorable(int codePoint) + { + if ((codePoint >= 0 && codePoint <= 0x0008) + || (codePoint >= 0x000E && codePoint <= 0x001B) + || (codePoint >= 0x007F && codePoint <= 0x009F) + || getType(codePoint) == FORMAT) + return true; + return false; } /** @@ -1975,8 +3509,37 @@ public final class Character implements Serializable, Comparable */ public static char toLowerCase(char ch) { - // Signedness doesn't matter, as result is cast back to char. - return (char) (ch + lower[readChar(ch) >> 7]); + return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch); + } + + /** + * Converts a Unicode character into its lowercase equivalent mapping. + * If a mapping does not exist, then the character passed is returned. + * Note that isLowerCase(toLowerCase(ch)) does not always return true. + * + * @param codePoint character to convert to lowercase + * @return lowercase mapping of ch, or ch if lowercase mapping does + * not exist + * @see #isLowerCase(char) + * @see #isUpperCase(char) + * @see #toTitleCase(char) + * @see #toUpperCase(char) + * + * @since 1.5 + */ + public static int toLowerCase(int codePoint) + { + // If the code point is unassigned or in one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.toLowerCase(codePoint); + if (plane > 14) + return PrivateUseCharacters.toLowerCase(codePoint); + + // The short value stored in lower[plane] is the signed difference between + // codePoint and its lowercase conversion. + return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint; } /** @@ -1994,8 +3557,37 @@ public final class Character implements Serializable, Comparable */ public static char toUpperCase(char ch) { - // Signedness doesn't matter, as result is cast back to char. - return (char) (ch + upper[readChar(ch) >> 7]); + return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch); + } + + /** + * Converts a Unicode character into its uppercase equivalent mapping. + * If a mapping does not exist, then the character passed is returned. + * Note that isUpperCase(toUpperCase(ch)) does not always return true. + * + * @param codePoint character to convert to uppercase + * @return uppercase mapping of ch, or ch if uppercase mapping does + * not exist + * @see #isLowerCase(char) + * @see #isUpperCase(char) + * @see #toLowerCase(char) + * @see #toTitleCase(char) + * + * @since 1.5 + */ + public static int toUpperCase(int codePoint) + { + // If the code point is unassigned or in one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.toUpperCase(codePoint); + if (plane > 14) + return PrivateUseCharacters.toUpperCase(codePoint); + + // The short value stored in upper[plane] is the signed difference between + // codePoint and its uppercase conversion. + return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint; } /** @@ -2018,6 +3610,30 @@ public final class Character implements Serializable, Comparable return title[i + 1]; return toUpperCase(ch); } + + /** + * Converts a Unicode character into its titlecase equivalent mapping. + * If a mapping does not exist, then the character passed is returned. + * Note that isTitleCase(toTitleCase(ch)) does not always return true. + * + * @param codePoint character to convert to titlecase + * @return titlecase mapping of ch, or ch if titlecase mapping does + * not exist + * @see #isTitleCase(char) + * @see #toLowerCase(char) + * @see #toUpperCase(char) + * + * @since 1.5 + */ + public static int toTitleCase(int codePoint) + { + // As of Unicode 4.0.0 no characters outside of plane 0 have + // titlecase mappings that are different from their uppercase + // mapping. + if (codePoint < 0x10000) + return (int) toTitleCase((char)codePoint); + return toUpperCase(codePoint); + } /** * Converts a character into a digit of the specified radix. If the radix @@ -2041,20 +3657,68 @@ public final class Character implements Serializable, Comparable { if (radix < MIN_RADIX || radix > MAX_RADIX) return -1; - char attr = readChar(ch); + char attr = readCodePoint((int)ch); if (((1 << (attr & TYPE_MASK)) & ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) | (1 << DECIMAL_DIGIT_NUMBER))) != 0) { // Signedness doesn't matter; 0xffff vs. -1 are both rejected. - int digit = numValue[attr >> 7]; + int digit = numValue[0][attr >> 7]; return (digit < radix) ? digit : -1; } return -1; } /** + * Converts a character into a digit of the specified radix. If the radix + * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch) + * exceeds the radix, or if ch is not a decimal digit or in the case + * insensitive set of 'a'-'z', the result is -1. + * <br> + * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A + * |U+FF21-U+FF3A|U+FF41-U+FF5A + * + * @param codePoint character to convert into a digit + * @param radix radix in which ch is a digit + * @return digit which ch represents in radix, or -1 not a valid digit + * @see #MIN_RADIX + * @see #MAX_RADIX + * @see #forDigit(int, int) + * @see #isDigit(char) + * @see #getNumericValue(char) + */ + public static int digit(int codePoint, int radix) + { + if (radix < MIN_RADIX || radix > MAX_RADIX) + return -1; + + // If the code point is unassigned or in one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.digit(codePoint, radix); + if (plane > 14) + return PrivateUseCharacters.digit(codePoint, radix); + char attr = readCodePoint(codePoint); + if (((1 << (attr & TYPE_MASK)) + & ((1 << UPPERCASE_LETTER) + | (1 << LOWERCASE_LETTER) + | (1 << DECIMAL_DIGIT_NUMBER))) != 0) + { + // Signedness doesn't matter; 0xffff vs. -1 are both rejected. + int digit = numValue[plane][attr >> 7]; + + // If digit is less than or equal to -3 then the numerical value was + // too large to fit into numValue and is stored in CharData.LARGENUMS. + if (digit <= -3) + digit = CharData.LARGENUMS[-digit - 3]; + return (digit < radix) ? digit : -1; + } + return -1; + } + + /** * Returns the Unicode numeric value property of a character. For example, * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50. * @@ -2084,7 +3748,53 @@ public final class Character implements Serializable, Comparable public static int getNumericValue(char ch) { // Treat numValue as signed. - return (short) numValue[readChar(ch) >> 7]; + return (short) numValue[0][readCodePoint((int)ch) >> 7]; + } + + /** + * Returns the Unicode numeric value property of a character. For example, + * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50. + * + * <p>This method also returns values for the letters A through Z, (not + * specified by Unicode), in these ranges: <code>'\u0041'</code> + * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code> + * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code> + * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through + * <code>'\uFF5A'</code> (full width variants). + * + * <p>If the character lacks a numeric value property, -1 is returned. + * If the character has a numeric value property which is not representable + * as a nonnegative integer, such as a fraction, -2 is returned. + * + * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A + * |U+FF21-U+FF3A|U+FF41-U+FF5A + * + * @param codePoint character from which the numeric value property will + * be retrieved + * @return the numeric value property of ch, or -1 if it does not exist, or + * -2 if it is not representable as a nonnegative integer + * @see #forDigit(int, int) + * @see #digit(char, int) + * @see #isDigit(char) + * @since 1.5 + */ + public static int getNumericValue(int codePoint) + { + // If the code point is unassigned or in one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.getNumericValue(codePoint); + if (plane > 14) + return PrivateUseCharacters.getNumericValue(codePoint); + + // If the value N found in numValue[plane] is less than or equal to -3 + // then the numeric value was too big to fit into 16 bits and is + // stored in CharData.LARGENUMS at offset (-N - 3). + short num = (short)numValue[plane][readCodePoint(codePoint) >> 7]; + if (num <= -3) + return CharData.LARGENUMS[-num - 3]; + return num; } /** @@ -2124,7 +3834,23 @@ public final class Character implements Serializable, Comparable */ public static boolean isSpaceChar(char ch) { - return ((1 << getType(ch)) + return isSpaceChar((int)ch); + } + + /** + * Determines if a character is a Unicode space character. This includes + * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR. + * <br> + * Unicode space = [Zs]|[Zp]|[Zl] + * + * @param codePoint character to test + * @return true if ch is a Unicode space, else false + * @see #isWhitespace(char) + * @since 1.5 + */ + public static boolean isSpaceChar(int codePoint) + { + return ((1 << getType(codePoint)) & ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) | (1 << PARAGRAPH_SEPARATOR))) != 0; @@ -2149,13 +3875,41 @@ public final class Character implements Serializable, Comparable */ public static boolean isWhitespace(char ch) { - int attr = readChar(ch); + return isWhitespace((int) ch); + } + + /** + * Determines if a character is Java whitespace. This includes Unicode + * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and + * PARAGRAPH_SEPARATOR) except the non-breaking spaces + * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>); + * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>, + * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>, + * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>, + * and <code>'\u001F'</code>. + * <br> + * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F + * + * @param codePoint character to test + * @return true if ch is Java whitespace, else false + * @see #isSpaceChar(char) + * @since 1.5 + */ + public static boolean isWhitespace(int codePoint) + { + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.isWhiteSpace(codePoint); + if (plane > 14) + return PrivateUseCharacters.isWhiteSpace(codePoint); + + int attr = readCodePoint(codePoint); return ((((1 << (attr & TYPE_MASK)) & ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) | (1 << PARAGRAPH_SEPARATOR))) != 0) && (attr & NO_BREAK_MASK) == 0) - || (ch <= '\u001F' && ((1 << ch) + || (codePoint <= '\u001F' && ((1 << codePoint) & ((1 << '\t') | (1 << '\n') | (1 << '\u000B') @@ -2180,7 +3934,24 @@ public final class Character implements Serializable, Comparable */ public static boolean isISOControl(char ch) { - return getType(ch) == CONTROL; + return isISOControl((int)ch); + } + + /** + * Determines if the character is an ISO Control character. This is true + * if the code point is in the range [0, 0x001F] or if it is in the range + * [0x007F, 0x009F]. + * @param codePoint the character to check + * @return true if the character is in one of the above ranges + * + * @since 1.5 + */ + public static boolean isISOControl(int codePoint) + { + if ((codePoint >= 0 && codePoint <= 0x001F) + || (codePoint >= 0x007F && codePoint <= 0x009F)) + return true; + return false; } /** @@ -2222,7 +3993,58 @@ public final class Character implements Serializable, Comparable */ public static int getType(char ch) { - return readChar(ch) & TYPE_MASK; + return getType((int)ch); + } + + /** + * Returns the Unicode general category property of a character. + * + * @param codePoint character from which the general category property will + * be retrieved + * @return the character category property of ch as an integer + * @see #UNASSIGNED + * @see #UPPERCASE_LETTER + * @see #LOWERCASE_LETTER + * @see #TITLECASE_LETTER + * @see #MODIFIER_LETTER + * @see #OTHER_LETTER + * @see #NON_SPACING_MARK + * @see #ENCLOSING_MARK + * @see #COMBINING_SPACING_MARK + * @see #DECIMAL_DIGIT_NUMBER + * @see #LETTER_NUMBER + * @see #OTHER_NUMBER + * @see #SPACE_SEPARATOR + * @see #LINE_SEPARATOR + * @see #PARAGRAPH_SEPARATOR + * @see #CONTROL + * @see #FORMAT + * @see #PRIVATE_USE + * @see #SURROGATE + * @see #DASH_PUNCTUATION + * @see #START_PUNCTUATION + * @see #END_PUNCTUATION + * @see #CONNECTOR_PUNCTUATION + * @see #OTHER_PUNCTUATION + * @see #MATH_SYMBOL + * @see #CURRENCY_SYMBOL + * @see #MODIFIER_SYMBOL + * @see #INITIAL_QUOTE_PUNCTUATION + * @see #FINAL_QUOTE_PUNCTUATION + * + * @since 1.5 + */ + public static int getType(int codePoint) + { + // If the codePoint is unassigned or in one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.getType(codePoint); + if (plane > 14) + return PrivateUseCharacters.getType(codePoint); + + return readCodePoint(codePoint) & TYPE_MASK; } /** @@ -2279,9 +4101,51 @@ public final class Character implements Serializable, Comparable public static byte getDirectionality(char ch) { // The result will correctly be signed. - return (byte) (direction[readChar(ch) >> 7] >> 2); + return getDirectionality((int)ch); } - + + /** + * Returns the Unicode directionality property of the character. This + * is used in the visual ordering of text. + * + * @param codePoint the character to look up + * @return the directionality constant, or DIRECTIONALITY_UNDEFINED + * @see #DIRECTIONALITY_UNDEFINED + * @see #DIRECTIONALITY_LEFT_TO_RIGHT + * @see #DIRECTIONALITY_RIGHT_TO_LEFT + * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC + * @see #DIRECTIONALITY_EUROPEAN_NUMBER + * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR + * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR + * @see #DIRECTIONALITY_ARABIC_NUMBER + * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR + * @see #DIRECTIONALITY_NONSPACING_MARK + * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL + * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR + * @see #DIRECTIONALITY_SEGMENT_SEPARATOR + * @see #DIRECTIONALITY_WHITESPACE + * @see #DIRECTIONALITY_OTHER_NEUTRALS + * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING + * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE + * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING + * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE + * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT + * @since 1.5 + */ + public static byte getDirectionality(int codePoint) + { + // If the code point is unassigned or in one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.getDirectionality(codePoint); + if (plane > 14) + return PrivateUseCharacters.getDirectionality(codePoint); + + // The result will correctly be signed. + return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2); + } + /** * Determines whether the character is mirrored according to Unicode. For * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in @@ -2293,7 +4157,29 @@ public final class Character implements Serializable, Comparable */ public static boolean isMirrored(char ch) { - return (readChar(ch) & MIRROR_MASK) != 0; + return (readCodePoint((int)ch) & MIRROR_MASK) != 0; + } + + /** + * Determines whether the character is mirrored according to Unicode. For + * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in + * left-to-right text, but ')' in right-to-left text. + * + * @param codePoint the character to look up + * @return true if the character is mirrored + * @since 1.5 + */ + public static boolean isMirrored(int codePoint) + { + // If the code point is unassigned or part of one of the private use areas + // then we delegate the call to the appropriate private static inner class. + int plane = codePoint >>> 16; + if (plane > 2 && plane < 14) + return UnassignedCharacters.isMirrored(codePoint); + if (plane > 14) + return PrivateUseCharacters.isMirrored(codePoint); + + return (readCodePoint(codePoint) & MIRROR_MASK) != 0; } /** @@ -2374,6 +4260,9 @@ public final class Character implements Serializable, Comparable */ public static char[] toChars(int codePoint) { + if (!isValidCodePoint(codePoint)) + throw new IllegalArgumentException("Illegal Unicode code point : " + + codePoint); char[] result = new char[charCount(codePoint)]; int ignore = toChars(codePoint, result, 0); return result; @@ -2591,7 +4480,7 @@ public final class Character implements Serializable, Comparable */ public static int codePointAt(char[] chars, int index, int limit) { - if (index < 0 || index >= limit || limit < 0 || limit >= chars.length) + if (index < 0 || index >= limit || limit < 0 || limit > chars.length) throw new IndexOutOfBoundsException(); char high = chars[index]; if (! isHighSurrogate(high) || ++index >= limit) diff --git a/libjava/classpath/java/lang/ClassNotFoundException.java b/libjava/classpath/java/lang/ClassNotFoundException.java index 6b6ae949dd2..142bc5d0306 100644 --- a/libjava/classpath/java/lang/ClassNotFoundException.java +++ b/libjava/classpath/java/lang/ClassNotFoundException.java @@ -70,7 +70,7 @@ public class ClassNotFoundException extends Exception */ public ClassNotFoundException() { - this(null, null); + this(null); } /** @@ -81,7 +81,8 @@ public class ClassNotFoundException extends Exception */ public ClassNotFoundException(String s) { - this(s, null); + super(s); + ex = null; } /** diff --git a/libjava/classpath/java/lang/Math.java b/libjava/classpath/java/lang/Math.java index 08081e2523a..d7c8aa1c433 100644 --- a/libjava/classpath/java/lang/Math.java +++ b/libjava/classpath/java/lang/Math.java @@ -1,5 +1,5 @@ -/* java.lang.Math -- common mathematical functions, native allowed - Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. +/* java.lang.Math -- common mathematical functions, native allowed (VMMath) + Copyright (C) 1998, 2001, 2002, 2003, 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -52,26 +52,34 @@ import java.util.Random; * @author Paul Fisher * @author John Keiser * @author Eric Blake (ebb9@email.byu.edu) + * @author Andrew John Hughes (gnu_andrew@member.fsf.org) * @since 1.0 */ public final class Math { - /** - * Math is non-instantiable - */ - private Math() - { - } + // FIXME - This is here because we need to load the "javalang" system + // library somewhere late in the bootstrap cycle. We cannot do this + // from VMSystem or VMRuntime since those are used to actually load + // the library. This is mainly here because historically Math was + // late enough in the bootstrap cycle to start using System after it + // was initialized (called from the java.util classes). static { if (Configuration.INIT_LOAD_LIBRARY) { - System.loadLibrary("javalang"); + System.loadLibrary("javalang"); } } /** + * Math is non-instantiable + */ + private Math() + { + } + + /** * A random number generator, initialized on first use. */ private static Random rand; @@ -298,7 +306,10 @@ public final class Math * @param a the angle (in radians) * @return sin(a) */ - public static native double sin(double a); + public static double sin(double a) + { + return VMMath.sin(a); + } /** * The trigonometric function <em>cos</em>. The cosine of NaN or infinity is @@ -307,7 +318,10 @@ public final class Math * @param a the angle (in radians) * @return cos(a) */ - public static native double cos(double a); + public static double cos(double a) + { + return VMMath.cos(a); + } /** * The trigonometric function <em>tan</em>. The tangent of NaN or infinity @@ -317,7 +331,10 @@ public final class Math * @param a the angle (in radians) * @return tan(a) */ - public static native double tan(double a); + public static double tan(double a) + { + return VMMath.tan(a); + } /** * The trigonometric function <em>arcsin</em>. The range of angles returned @@ -328,7 +345,10 @@ public final class Math * @param a the sin to turn back into an angle * @return arcsin(a) */ - public static native double asin(double a); + public static double asin(double a) + { + return VMMath.asin(a); + } /** * The trigonometric function <em>arccos</em>. The range of angles returned @@ -339,7 +359,10 @@ public final class Math * @param a the cos to turn back into an angle * @return arccos(a) */ - public static native double acos(double a); + public static double acos(double a) + { + return VMMath.acos(a); + } /** * The trigonometric function <em>arcsin</em>. The range of angles returned @@ -351,7 +374,10 @@ public final class Math * @return arcsin(a) * @see #atan2(double, double) */ - public static native double atan(double a); + public static double atan(double a) + { + return VMMath.atan(a); + } /** * A special version of the trigonometric function <em>arctan</em>, for @@ -400,7 +426,10 @@ public final class Math * @return <em>theta</em> in the conversion of (x, y) to (r, theta) * @see #atan(double) */ - public static native double atan2(double y, double x); + public static double atan2(double y, double x) + { + return VMMath.atan2(y,x); + } /** * Take <em>e</em><sup>a</sup>. The opposite of <code>log()</code>. If the @@ -414,7 +443,10 @@ public final class Math * @see #log(double) * @see #pow(double, double) */ - public static native double exp(double a); + public static double exp(double a) + { + return VMMath.exp(a); + } /** * Take ln(a) (the natural log). The opposite of <code>exp()</code>. If the @@ -430,7 +462,10 @@ public final class Math * @return the natural log of <code>a</code> * @see #exp(double) */ - public static native double log(double a); + public static double log(double a) + { + return VMMath.log(a); + } /** * Take a square root. If the argument is NaN or negative, the result is @@ -438,13 +473,18 @@ public final class Math * infinity; and if the result is either zero, the result is the same. * This is accurate within the limits of doubles. * - * <p>For other roots, use pow(a, 1 / rootNumber). + * <p>For a cube root, use <code>cbrt</code>. For other roots, use + * <code>pow(a, 1 / rootNumber)</code>.</p> * * @param a the numeric argument * @return the square root of the argument + * @see #cbrt(double) * @see #pow(double, double) */ - public static native double sqrt(double a); + public static double sqrt(double a) + { + return VMMath.sqrt(a); + } /** * Raise a number to a power. Special cases:<ul> @@ -514,7 +554,10 @@ public final class Math * @param b the power to raise it to * @return a<sup>b</sup> */ - public static native double pow(double a, double b); + public static double pow(double a, double b) + { + return VMMath.pow(a,b); + } /** * Get the IEEE 754 floating point remainder on two numbers. This is the @@ -530,7 +573,10 @@ public final class Math * @return the IEEE 754-defined floating point remainder of x/y * @see #rint(double) */ - public static native double IEEEremainder(double x, double y); + public static double IEEEremainder(double x, double y) + { + return VMMath.IEEEremainder(x,y); + } /** * Take the nearest integer that is that is greater than or equal to the @@ -541,7 +587,10 @@ public final class Math * @param a the value to act upon * @return the nearest integer >= <code>a</code> */ - public static native double ceil(double a); + public static double ceil(double a) + { + return VMMath.ceil(a); + } /** * Take the nearest integer that is that is less than or equal to the @@ -551,7 +600,10 @@ public final class Math * @param a the value to act upon * @return the nearest integer <= <code>a</code> */ - public static native double floor(double a); + public static double floor(double a) + { + return VMMath.floor(a); + } /** * Take the nearest integer to the argument. If it is exactly between @@ -561,7 +613,10 @@ public final class Math * @param a the value to act upon * @return the nearest integer to <code>a</code> */ - public static native double rint(double a); + public static double rint(double a) + { + return VMMath.rint(a); + } /** * Take the nearest integer to the argument. This is equivalent to @@ -647,4 +702,250 @@ public final class Math { return (rads * 180) / PI; } + + /** + * <p> + * Take a cube root. If the argument is <code>NaN</code>, an infinity or + * zero, then the original value is returned. The returned result is + * within 1 ulp of the exact result. For a finite value, <code>x</code>, + * the cube root of <code>-x</code> is equal to the negation of the cube root + * of <code>x</code>. + * </p> + * <p> + * For a square root, use <code>sqrt</code>. For other roots, use + * <code>pow(a, 1 / rootNumber)</code>. + * </p> + * + * @param a the numeric argument + * @return the cube root of the argument + * @see #sqrt(double) + * @see #pow(double, double) + * @since 1.5 + */ + public static double cbrt(double a) + { + return VMMath.cbrt(a); + } + + /** + * <p> + * Returns the hyperbolic cosine of the given value. For a value, + * <code>x</code>, the hyperbolic cosine is <code>(e<sup>x</sup> + + * e<sup>-x</sup>)/2</code> + * with <code>e</code> being <a href="#E">Euler's number</a>. The returned + * result is within 2.5 ulps of the exact result. + * </p> + * <p> + * If the supplied value is <code>NaN</code>, then the original value is + * returned. For either infinity, positive infinity is returned. + * The hyperbolic cosine of zero is 1.0. + * </p> + * + * @param a the numeric argument + * @return the hyperbolic cosine of <code>a</code>. + * @since 1.5 + */ + public static double cosh(double a) + { + return VMMath.cosh(a); + } + + /** + * <p> + * Returns <code>e<sup>a</sup> - 1. For values close to 0, the + * result of <code>expm1(a) + 1</code> tend to be much closer to the + * exact result than simply <code>exp(x)</code>. The result is within + * 1 ulp of the exact result, and results are semi-monotonic. For finite + * inputs, the returned value is greater than or equal to -1.0. Once + * a result enters within half a ulp of this limit, the limit is returned. + * </p> + * <p> + * For <code>NaN</code>, positive infinity and zero, the original value + * is returned. Negative infinity returns a result of -1.0 (the limit). + * </p> + * + * @param a the numeric argument + * @return <code>e<sup>a</sup> - 1</code> + * @since 1.5 + */ + public static double expm1(double a) + { + return VMMath.expm1(a); + } + + /** + * <p> + * Returns the hypotenuse, <code>a<sup>2</sup> + b<sup>2</sup></code>, + * without intermediate overflow or underflow. The returned result is + * within 1 ulp of the exact result. If one parameter is held constant, + * then the result in the other parameter is semi-monotonic. + * </p> + * <p> + * If either of the arguments is an infinity, then the returned result + * is positive infinity. Otherwise, if either argument is <code>NaN</code>, + * then <code>NaN</code> is returned. + * </p> + * + * @param a the first parameter. + * @param b the second parameter. + * @return the hypotenuse matching the supplied parameters. + * @since 1.5 + */ + public static double hypot(double a, double b) + { + return VMMath.hypot(a,b); + } + + /** + * <p> + * Returns the base 10 logarithm of the supplied value. The returned + * result is within 1 ulp of the exact result, and the results are + * semi-monotonic. + * </p> + * <p> + * Arguments of either <code>NaN</code> or less than zero return + * <code>NaN</code>. An argument of positive infinity returns positive + * infinity. Negative infinity is returned if either positive or negative + * zero is supplied. Where the argument is the result of + * <code>10<sup>n</sup</code>, then <code>n</code> is returned. + * </p> + * + * @param a the numeric argument. + * @return the base 10 logarithm of <code>a</code>. + * @since 1.5 + */ + public static double log10(double a) + { + return VMMath.log10(a); + } + + /** + * <p> + * Returns the natural logarithm resulting from the sum of the argument, + * <code>a</code> and 1. For values close to 0, the + * result of <code>log1p(a)</code> tend to be much closer to the + * exact result than simply <code>log(1.0+a)</code>. The returned + * result is within 1 ulp of the exact result, and the results are + * semi-monotonic. + * </p> + * <p> + * Arguments of either <code>NaN</code> or less than -1 return + * <code>NaN</code>. An argument of positive infinity or zero + * returns the original argument. Negative infinity is returned from an + * argument of -1. + * </p> + * + * @param a the numeric argument. + * @return the natural logarithm of <code>a</code> + 1. + * @since 1.5 + */ + public static double log1p(double a) + { + return VMMath.log1p(a); + } + + /** + * <p> + * Returns the sign of the argument as follows: + * </p> + * <ul> + * <li>If <code>a</code> is greater than zero, the result is 1.0.</li> + * <li>If <code>a</code> is less than zero, the result is -1.0.</li> + * <li>If <code>a</code> is <code>NaN</code>, the result is <code>NaN</code>. + * <li>If <code>a</code> is positive or negative zero, the result is the + * same.</li> + * </ul> + * + * @param a the numeric argument. + * @return the sign of the argument. + * @since 1.5. + */ + public static double signum(double a) + { + if (Double.isNaN(a)) + return Double.NaN; + if (a > 0) + return 1.0; + if (a < 0) + return -1.0; + return a; + } + + /** + * <p> + * Returns the sign of the argument as follows: + * </p> + * <ul> + * <li>If <code>a</code> is greater than zero, the result is 1.0f.</li> + * <li>If <code>a</code> is less than zero, the result is -1.0f.</li> + * <li>If <code>a</code> is <code>NaN</code>, the result is <code>NaN</code>. + * <li>If <code>a</code> is positive or negative zero, the result is the + * same.</li> + * </ul> + * + * @param a the numeric argument. + * @return the sign of the argument. + * @since 1.5. + */ + public static float signum(float a) + { + if (Float.isNaN(a)) + return Float.NaN; + if (a > 0) + return 1.0f; + if (a < 0) + return -1.0f; + return a; + } + + /** + * <p> + * Returns the hyperbolic sine of the given value. For a value, + * <code>x</code>, the hyperbolic sine is <code>(e<sup>x</sup> - + * e<sup>-x</sup>)/2</code> + * with <code>e</code> being <a href="#E">Euler's number</a>. The returned + * result is within 2.5 ulps of the exact result. + * </p> + * <p> + * If the supplied value is <code>NaN</code>, an infinity or a zero, then the + * original value is returned. + * </p> + * + * @param a the numeric argument + * @return the hyperbolic sine of <code>a</code>. + * @since 1.5 + */ + public static double sinh(double a) + { + return VMMath.sinh(a); + } + + /** + * <p> + * Returns the hyperbolic tangent of the given value. For a value, + * <code>x</code>, the hyperbolic tangent is <code>(e<sup>x</sup> - + * e<sup>-x</sup>)/(e<sup>x</sup> + e<sup>-x</sup>)</code> + * (i.e. <code>sinh(a)/cosh(a)</code>) + * with <code>e</code> being <a href="#E">Euler's number</a>. The returned + * result is within 2.5 ulps of the exact result. The absolute value + * of the exact result is always less than 1. Computed results are thus + * less than or equal to 1 for finite arguments, with results within + * half a ulp of either positive or negative 1 returning the appropriate + * limit value (i.e. as if the argument was an infinity). + * </p> + * <p> + * If the supplied value is <code>NaN</code> or zero, then the original + * value is returned. Positive infinity returns +1.0 and negative infinity + * returns -1.0. + * </p> + * + * @param a the numeric argument + * @return the hyperbolic tangent of <code>a</code>. + * @since 1.5 + */ + public static double tanh(double a) + { + return VMMath.tanh(a); + } + } diff --git a/libjava/classpath/java/lang/String.java b/libjava/classpath/java/lang/String.java index 231afc77b92..199dfba3545 100644 --- a/libjava/classpath/java/lang/String.java +++ b/libjava/classpath/java/lang/String.java @@ -555,6 +555,49 @@ public final class String implements Serializable, Comparable, CharSequence } /** + * Creates a new String containing the characters represented in the + * given subarray of Unicode code points. + * @param codePoints the entire array of code points + * @param offset the start of the subarray + * @param count the length of the subarray + * + * @throws IllegalArgumentException if an invalid code point is found + * in the codePoints array + * @throws IndexOutOfBoundsException if offset is negative or offset + count + * is greater than the length of the array. + */ + public String(int[] codePoints, int offset, int count) + { + // FIXME: This implementation appears to give correct internal + // representation of the String because: + // - length() is correct + // - getting a char[] from toCharArray() and testing + // Character.codePointAt() on all the characters in that array gives + // the appropriate results + // however printing the String gives incorrect results. This may be + // due to printing method errors (such as incorrectly looping through + // the String one char at a time rather than one "character" at a time. + + if (offset < 0) + throw new IndexOutOfBoundsException(); + int end = offset + count; + int pos = 0; + // This creates a char array that is long enough for all of the code + // points to represent supplementary characters. This is more than likely + // a waste of storage, so we use it only temporarily and then copy the + // used portion into the value array. + char[] temp = new char[2 * codePoints.length]; + for (int i = offset; i < end; i++) + { + pos += Character.toChars(codePoints[i], temp, pos); + } + this.count = pos; + this.value = new char[pos]; + System.arraycopy(temp, 0, value, 0, pos); + this.offset = 0; + } + + /** * Returns the number of characters contained in this String. * * @return the length of this String @@ -1822,7 +1865,7 @@ public final class String implements Serializable, Comparable, CharSequence */ private static int upperCaseExpansion(char ch) { - return Character.direction[Character.readChar(ch) >> 7] & 3; + return Character.direction[0][Character.readCodePoint((int)ch) >> 7] & 3; } /** @@ -1918,4 +1961,29 @@ public final class String implements Serializable, Comparable, CharSequence } return result.toString(); } + + /** + * Return the index into this String that is offset from the given index by + * <code>codePointOffset</code> code points. + * @param index the index at which to start + * @param codePointOffset the number of code points to offset + * @return the index into this String that is <code>codePointOffset</code> + * code points offset from <code>index</code>. + * + * @throws IndexOutOfBoundsException if index is negative or larger than the + * length of this string. + * @throws IndexOutOfBoundsException if codePointOffset is positive and the + * substring starting with index has fewer than codePointOffset code points. + * @throws IndexOutOfBoundsException if codePointOffset is negative and the + * substring ending with index has fewer than (-codePointOffset) code points. + * @since 1.5 + */ + public int offsetByCodePoints(int index, int codePointOffset) + { + if (index < 0 || index > count) + throw new IndexOutOfBoundsException(); + + return Character.offsetByCodePoints(value, offset, count, offset + index, + codePointOffset); + } } diff --git a/libjava/classpath/java/lang/StringBuilder.java b/libjava/classpath/java/lang/StringBuilder.java index 470f1ba9ad5..01a83ca707b 100644 --- a/libjava/classpath/java/lang/StringBuilder.java +++ b/libjava/classpath/java/lang/StringBuilder.java @@ -1006,4 +1006,65 @@ public final class StringBuilder return false; return true; } + + /** + * Get the code point at the specified index. This is like #charAt(int), + * but if the character is the start of a surrogate pair, and the + * following character completes the pair, then the corresponding + * supplementary code point is returned. + * @param index the index of the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public int codePointAt(int index) + { + return Character.codePointAt(value, index, count); + } + + /** + * Get the code point before the specified index. This is like + * #codePointAt(int), but checks the characters at <code>index-1</code> and + * <code>index-2</code> to see if they form a supplementary code point. + * @param index the index just past the codepoint to get, starting at 0 + * @return the codepoint at the specified index + * @throws IndexOutOfBoundsException if index is negative or >= length() + * @since 1.5 + */ + public int codePointBefore(int index) + { + // Character.codePointBefore() doesn't perform this check. We + // could use the CharSequence overload, but this is just as easy. + if (index >= count) + throw new IndexOutOfBoundsException(); + return Character.codePointBefore(value, index, 1); + } + + /** + * Returns the number of Unicode code points in the specified sub sequence. + * Surrogate pairs count as one code point. + * @param beginIndex the start of the subarray + * @param endIndex the index after the last char in the subarray + * @return the number of code points + * @throws IndexOutOfBoundsException if beginIndex is less than zero or + * greater than endIndex or if endIndex is greater than the length of this + * StringBuilder + */ + public int codePointCount(int beginIndex,int endIndex) + { + if (beginIndex < 0 || beginIndex > endIndex || endIndex > count) + throw new IndexOutOfBoundsException("invalid indices: " + beginIndex + + ", " + endIndex); + return Character.codePointCount(value, beginIndex, endIndex - beginIndex); + } + + public void trimToSize() + { + if (count < value.length) + { + char[] newValue = new char[count]; + System.arraycopy(value, 0, newValue, 0, count); + value = newValue; + } + } } diff --git a/libjava/classpath/java/lang/System.java b/libjava/classpath/java/lang/System.java index ea84dba47e2..34bbfddc672 100644 --- a/libjava/classpath/java/lang/System.java +++ b/libjava/classpath/java/lang/System.java @@ -178,6 +178,23 @@ public final class System if (SecurityManager.current != null) SecurityManager.current.checkPermission (new RuntimePermission("setSecurityManager")); + + // java.security.Security's class initialiser loads and parses the + // policy files. If it hasn't been run already it will be run + // during the first permission check. That initialisation will + // fail if a very restrictive security manager is in force, so we + // preload it here. + if (SecurityManager.current == null) + { + try + { + Class.forName("java.security.Security"); + } + catch (ClassNotFoundException e) + { + } + } + SecurityManager.current = sm; } diff --git a/libjava/classpath/java/lang/Thread.java b/libjava/classpath/java/lang/Thread.java index 9afde5bfd03..76df1037bb5 100644 --- a/libjava/classpath/java/lang/Thread.java +++ b/libjava/classpath/java/lang/Thread.java @@ -906,7 +906,7 @@ public class Thread implements Runnable if (sm != null) { sm.checkAccess(this); - if (this != currentThread()) + if (this != currentThread() || !(t instanceof ThreadDeath)) sm.checkPermission(new RuntimePermission("stopThread")); } VMThread vt = vmThread; diff --git a/libjava/classpath/java/lang/reflect/Proxy.java b/libjava/classpath/java/lang/reflect/Proxy.java index 137cb5a48a6..94aa0bbb2a0 100644 --- a/libjava/classpath/java/lang/reflect/Proxy.java +++ b/libjava/classpath/java/lang/reflect/Proxy.java @@ -1,5 +1,5 @@ /* Proxy.java -- build a proxy class that implements reflected interfaces - Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -42,6 +42,7 @@ import gnu.java.lang.reflect.TypeSignature; import java.io.Serializable; import java.security.ProtectionDomain; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -732,6 +733,12 @@ public class Proxy implements Serializable int j = methods.length; while (--j >= 0) { + if (isCoreObjectMethod(methods[j])) + { + // In the case of an attempt to redefine a public non-final + // method of Object, we must skip it + continue; + } ProxySignature sig = new ProxySignature(methods[j]); ProxySignature old = (ProxySignature) method_set.put(sig, sig); if (old != null) @@ -752,6 +759,41 @@ public class Proxy implements Serializable } return data; } + + /** + * Checks whether the method is similar to a public non-final method of + * Object or not (i.e. with the same name and parameter types). Note that we + * can't rely, directly or indirectly (via Collection.contains) on + * Method.equals as it would also check the declaring class, what we do not + * want. We only want to check that the given method have the same signature + * as a core method (same name and parameter types) + * + * @param method the method to check + * @return whether the method has the same name and parameter types as + * Object.equals, Object.hashCode or Object.toString + * @see java.lang.Object#equals(Object) + * @see java.lang.Object#hashCode() + * @see java.lang.Object#toString() + */ + private static boolean isCoreObjectMethod(Method method) + { + String methodName = method.getName(); + if (methodName.equals("equals")) + { + return Arrays.equals(method.getParameterTypes(), + new Class[] { Object.class }); + } + if (methodName.equals("hashCode")) + { + return method.getParameterTypes().length == 0; + } + if (methodName.equals("toString")) + { + return method.getParameterTypes().length == 0; + } + return false; + } + } // class ProxyData /** |