summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/icu/unicode/uchar.h
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/icu/unicode/uchar.h
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/JavaScriptCore/icu/unicode/uchar.h')
-rw-r--r--Source/JavaScriptCore/icu/unicode/uchar.h329
1 files changed, 246 insertions, 83 deletions
diff --git a/Source/JavaScriptCore/icu/unicode/uchar.h b/Source/JavaScriptCore/icu/unicode/uchar.h
index 93aa66320..1a5b71b46 100644
--- a/Source/JavaScriptCore/icu/unicode/uchar.h
+++ b/Source/JavaScriptCore/icu/unicode/uchar.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -39,7 +39,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
-#define U_UNICODE_VERSION "6.0"
+#define U_UNICODE_VERSION "6.3"
/**
* \file
@@ -139,19 +139,6 @@ U_CDECL_BEGIN
*/
#define U_MASK(x) ((uint32_t)1<<(x))
-/*
- * !! Note: Several comments in this file are machine-read by the
- * genpname tool. These comments describe the correspondence between
- * icu enum constants and UCD entities. Do not delete them. Update
- * these comments as needed.
- *
- * Any comment of the form "/ *[name]* /" (spaces added) is such
- * a comment.
- *
- * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
- * name, which must match PropertyValueAliases.txt.
- */
-
/**
* Selection constants for Unicode properties.
* These constants are used in functions like u_hasBinaryProperty to select
@@ -172,9 +159,11 @@ U_CDECL_BEGIN
* @stable ICU 2.1
*/
typedef enum UProperty {
- /* See note !!. Comments of the form "Binary property Dash",
- "Enumerated property Script", "Double property Numeric_Value",
- and "String property Age" are read by genpname. */
+ /*
+ * Note: UProperty constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UCHAR_<Unicode property name>=<integer>,
+ */
/* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
@@ -491,8 +480,13 @@ typedef enum UProperty {
(http://www.unicode.org/reports/tr29/)
Returns UWordBreakValues values. @stable ICU 3.4 */
UCHAR_WORD_BREAK=0x1014,
+ /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ Used in UAX #9: Unicode Bidirectional Algorithm
+ (http://www.unicode.org/reports/tr9/)
+ Returns UBidiPairedBracketType values. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
- UCHAR_INT_LIMIT=0x1015,
+ UCHAR_INT_LIMIT=0x1016,
/** Bitmask property General_Category_Mask.
This is the General_Category property returned as a bit mask.
@@ -527,9 +521,11 @@ typedef enum UProperty {
/** String property Case_Folding.
Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
UCHAR_CASE_FOLDING=0x4002,
- /** String property ISO_Comment.
- Corresponds to u_getISOComment. @stable ICU 2.4 */
+#ifndef U_HIDE_DEPRECATED_API
+ /** Deprecated string property ISO_Comment.
+ Corresponds to u_getISOComment. @deprecated ICU 49 */
UCHAR_ISO_COMMENT=0x4003,
+#endif /* U_HIDE_DEPRECATED_API */
/** String property Lowercase_Mapping.
Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
UCHAR_LOWERCASE_MAPPING=0x4004,
@@ -551,29 +547,33 @@ typedef enum UProperty {
/** String property Titlecase_Mapping.
Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
/** String property Unicode_1_Name.
- Corresponds to u_charName. @stable ICU 2.4 */
+ This property is of little practical value.
+ Beginning with ICU 49, ICU APIs return an empty string for this property.
+ Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
UCHAR_UNICODE_1_NAME=0x400B,
+#endif /* U_HIDE_DEPRECATED_API */
/** String property Uppercase_Mapping.
Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+ Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET=0x400D,
/** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
- UCHAR_STRING_LIMIT=0x400D,
+ UCHAR_STRING_LIMIT=0x400E,
- /** Provisional property Script_Extensions (new in Unicode 6.0).
- As a provisional property, it may be modified or removed
- in future versions of the Unicode Standard, and thus in ICU.
+ /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
Some characters are commonly used in multiple scripts.
For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
- @draft ICU 4.6 */
+ @stable ICU 4.6 */
UCHAR_SCRIPT_EXTENSIONS=0x7000,
- /** First constant for Unicode properties with unusual value types. @draft ICU 4.6 */
+ /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
/** One more than the last constant for Unicode properties with unusual value types.
- * @draft ICU 4.6 */
+ * @stable ICU 4.6 */
UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
-
/** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
UCHAR_INVALID_CODE = -1
} UProperty;
@@ -585,7 +585,12 @@ typedef enum UProperty {
*/
typedef enum UCharCategory
{
- /** See note !!. Comments of the form "Cn" are read by genpname. */
+ /*
+ * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 2-letter General_Category value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
/** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
U_UNASSIGNED = 0,
@@ -770,7 +775,12 @@ typedef enum UCharCategory
* @stable ICU 2.0
*/
typedef enum UCharDirection {
- /** See note !!. Comments of the form "EN" are read by genpname. */
+ /*
+ * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
/** L @stable ICU 2.0 */
U_LEFT_TO_RIGHT = 0,
@@ -810,21 +820,57 @@ typedef enum UCharDirection {
U_DIR_NON_SPACING_MARK = 17,
/** BN @stable ICU 2.0 */
U_BOUNDARY_NEUTRAL = 18,
+ /** FSI @stable ICU 52 */
+ U_FIRST_STRONG_ISOLATE = 19,
+ /** LRI @stable ICU 52 */
+ U_LEFT_TO_RIGHT_ISOLATE = 20,
+ /** RLI @stable ICU 52 */
+ U_RIGHT_TO_LEFT_ISOLATE = 21,
+ /** PDI @stable ICU 52 */
+ U_POP_DIRECTIONAL_ISOLATE = 22,
/** @stable ICU 2.0 */
U_CHAR_DIRECTION_COUNT
} UCharDirection;
/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+ /*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+
+ /** Not a paired bracket. @stable ICU 52 */
+ U_BPT_NONE,
+ /** Open paired bracket. @stable ICU 52 */
+ U_BPT_OPEN,
+ /** Close paired bracket. @stable ICU 52 */
+ U_BPT_CLOSE,
+ /** @stable ICU 52 */
+ U_BPT_COUNT /* 3 */
+} UBidiPairedBracketType;
+
+/**
* Constants for Unicode blocks, see the Unicode Data file Blocks.txt
* @stable ICU 2.0
*/
enum UBlockCode {
+ /*
+ * Note: UBlockCode constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UBLOCK_<Unicode Block value name> = <integer>,
+ */
/** New No_Block value in Unicode 4. @stable ICU 2.6 */
UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
/** @stable ICU 2.0 */
- UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/
/** @stable ICU 2.0 */
UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
@@ -1061,7 +1107,7 @@ enum UBlockCode {
UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
/**
- * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Same as UBLOCK_PRIVATE_USE.
* Until Unicode 3.1.1, the corresponding block name was "Private Use",
* and multiple code point ranges had this block.
* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
@@ -1069,9 +1115,9 @@ enum UBlockCode {
*
* @stable ICU 2.0
*/
- UBLOCK_PRIVATE_USE = 78,
+ UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
/**
- * Same as UBLOCK_PRIVATE_USE.
+ * Same as UBLOCK_PRIVATE_USE_AREA.
* Until Unicode 3.1.1, the corresponding block name was "Private Use",
* and multiple code point ranges had this block.
* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
@@ -1079,7 +1125,7 @@ enum UBlockCode {
*
* @stable ICU 2.0
*/
- UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
+ UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
/** @stable ICU 2.0 */
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
@@ -1111,33 +1157,33 @@ enum UBlockCode {
/* New blocks in Unicode 3.1 */
/** @stable ICU 2.0 */
- UBLOCK_OLD_ITALIC = 88 , /*[10300]*/
+ UBLOCK_OLD_ITALIC = 88, /*[10300]*/
/** @stable ICU 2.0 */
- UBLOCK_GOTHIC = 89 , /*[10330]*/
+ UBLOCK_GOTHIC = 89, /*[10330]*/
/** @stable ICU 2.0 */
- UBLOCK_DESERET = 90 , /*[10400]*/
+ UBLOCK_DESERET = 90, /*[10400]*/
/** @stable ICU 2.0 */
- UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
/** @stable ICU 2.0 */
- UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
+ UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
/** @stable ICU 2.0 */
- UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
/** @stable ICU 2.0 */
- UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/
/** @stable ICU 2.0 */
- UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
/** @stable ICU 2.0 */
UBLOCK_TAGS = 96, /*[E0000]*/
/* New blocks in Unicode 3.2 */
+ /** @stable ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
/**
* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
* @stable ICU 2.2
*/
- UBLOCK_CYRILLIC_SUPPLEMENTARY = 97,
- /** @stable ICU 3.0 */
- UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
/** @stable ICU 2.2 */
UBLOCK_TAGALOG = 98, /*[1700]*/
/** @stable ICU 2.2 */
@@ -1381,8 +1427,33 @@ enum UBlockCode {
/** @stable ICU 4.6 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
- /** @stable ICU 2.0 */
- UBLOCK_COUNT = 210,
+ /* New blocks in Unicode 6.1 */
+
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
+ /** @stable ICU 49 */
+ UBLOCK_CHAKMA = 212, /*[11100]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
+ /** @stable ICU 49 */
+ UBLOCK_MIAO = 216, /*[16F00]*/
+ /** @stable ICU 49 */
+ UBLOCK_SHARADA = 217, /*[11180]*/
+ /** @stable ICU 49 */
+ UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
+ /** @stable ICU 49 */
+ UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
+ /** @stable ICU 49 */
+ UBLOCK_TAKRI = 220, /*[11680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COUNT = 221,
/** @stable ICU 2.0 */
UBLOCK_INVALID_CODE=-1
@@ -1399,7 +1470,13 @@ typedef enum UBlockCode UBlockCode;
* @stable ICU 2.2
*/
typedef enum UEastAsianWidth {
- U_EA_NEUTRAL, /*[N]*/ /*See note !!*/
+ /*
+ * Note: UEastAsianWidth constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_EA_<Unicode East_Asian_Width value name>
+ */
+
+ U_EA_NEUTRAL, /*[N]*/
U_EA_AMBIGUOUS, /*[A]*/
U_EA_HALFWIDTH, /*[H]*/
U_EA_FULLWIDTH, /*[F]*/
@@ -1407,10 +1484,6 @@ typedef enum UEastAsianWidth {
U_EA_WIDE, /*[W]*/
U_EA_COUNT
} UEastAsianWidth;
-/*
- * Implementation note:
- * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
- */
/**
* Selector constants for u_charName().
@@ -1424,10 +1497,21 @@ typedef enum UEastAsianWidth {
* @stable ICU 2.0
*/
typedef enum UCharNameChoice {
+ /** Unicode character name (Name property). @stable ICU 2.0 */
U_UNICODE_CHAR_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * The Unicode_1_Name property value which is of little practical value.
+ * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+ * @deprecated ICU 49
+ */
U_UNICODE_10_CHAR_NAME,
- U_EXTENDED_CHAR_NAME,
- U_CHAR_NAME_ALIAS, /**< Corrected name from NameAliases.txt. @stable ICU 4.4 */
+#endif /* U_HIDE_DEPRECATED_API */
+ /** Standard or synthetic character name. @stable ICU 2.0 */
+ U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
+ /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+ U_CHAR_NAME_ALIAS,
+ /** @stable ICU 2.0 */
U_CHAR_NAME_CHOICE_COUNT
} UCharNameChoice;
@@ -1457,7 +1541,13 @@ typedef enum UPropertyNameChoice {
* @stable ICU 2.2
*/
typedef enum UDecompositionType {
- U_DT_NONE, /*[none]*/ /*See note !!*/
+ /*
+ * Note: UDecompositionType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_DT_<Unicode Decomposition_Type value name>
+ */
+
+ U_DT_NONE, /*[none]*/
U_DT_CANONICAL, /*[can]*/
U_DT_COMPAT, /*[com]*/
U_DT_CIRCLE, /*[enc]*/
@@ -1485,7 +1575,13 @@ typedef enum UDecompositionType {
* @stable ICU 2.2
*/
typedef enum UJoiningType {
- U_JT_NON_JOINING, /*[U]*/ /*See note !!*/
+ /*
+ * Note: UJoiningType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JT_<Unicode Joining_Type value name>
+ */
+
+ U_JT_NON_JOINING, /*[U]*/
U_JT_JOIN_CAUSING, /*[C]*/
U_JT_DUAL_JOINING, /*[D]*/
U_JT_LEFT_JOINING, /*[L]*/
@@ -1501,6 +1597,12 @@ typedef enum UJoiningType {
* @stable ICU 2.2
*/
typedef enum UJoiningGroup {
+ /*
+ * Note: UJoiningGroup constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JG_<Unicode Joining_Group value name>
+ */
+
U_JG_NO_JOINING_GROUP,
U_JG_AIN,
U_JG_ALAPH,
@@ -1559,6 +1661,7 @@ typedef enum UJoiningGroup {
U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
U_JG_NYA, /**< @stable ICU 4.4 */
+ U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */
U_JG_COUNT
} UJoiningGroup;
@@ -1569,7 +1672,13 @@ typedef enum UJoiningGroup {
* @stable ICU 3.4
*/
typedef enum UGraphemeClusterBreak {
- U_GCB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_GCB_<Unicode Grapheme_Cluster_Break value name>
+ */
+
+ U_GCB_OTHER = 0, /*[XX]*/
U_GCB_CONTROL = 1, /*[CN]*/
U_GCB_CR = 2, /*[CR]*/
U_GCB_EXTEND = 3, /*[EX]*/
@@ -1581,7 +1690,8 @@ typedef enum UGraphemeClusterBreak {
U_GCB_V = 9, /*[V]*/
U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
U_GCB_PREPEND = 11, /*[PP]*/
- U_GCB_COUNT = 12
+ U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_GCB_COUNT = 13
} UGraphemeClusterBreak;
/**
@@ -1592,7 +1702,13 @@ typedef enum UGraphemeClusterBreak {
* @stable ICU 3.4
*/
typedef enum UWordBreakValues {
- U_WB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: UWordBreakValues constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_WB_<Unicode Word_Break value name>
+ */
+
+ U_WB_OTHER = 0, /*[XX]*/
U_WB_ALETTER = 1, /*[LE]*/
U_WB_FORMAT = 2, /*[FO]*/
U_WB_KATAKANA = 3, /*[KA]*/
@@ -1605,7 +1721,11 @@ typedef enum UWordBreakValues {
U_WB_LF = 10, /*[LF]*/
U_WB_MIDNUMLET =11, /*[MB]*/
U_WB_NEWLINE =12, /*[NL]*/
- U_WB_COUNT = 13
+ U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+ U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
+ U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
+ U_WB_COUNT = 17
} UWordBreakValues;
/**
@@ -1615,7 +1735,13 @@ typedef enum UWordBreakValues {
* @stable ICU 3.4
*/
typedef enum USentenceBreak {
- U_SB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: USentenceBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_SB_<Unicode Sentence_Break value name>
+ */
+
+ U_SB_OTHER = 0, /*[XX]*/
U_SB_ATERM = 1, /*[AT]*/
U_SB_CLOSE = 2, /*[CL]*/
U_SB_FORMAT = 3, /*[FO]*/
@@ -1640,7 +1766,13 @@ typedef enum USentenceBreak {
* @stable ICU 2.2
*/
typedef enum ULineBreak {
- U_LB_UNKNOWN = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: ULineBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_LB_<Unicode Line_Break value name>
+ */
+
+ U_LB_UNKNOWN = 0, /*[XX]*/
U_LB_AMBIGUOUS = 1, /*[AI]*/
U_LB_ALPHABETIC = 2, /*[AL]*/
U_LB_BREAK_BOTH = 3, /*[B2]*/
@@ -1655,9 +1787,9 @@ typedef enum ULineBreak {
U_LB_GLUE = 12, /*[GL]*/
U_LB_HYPHEN = 13, /*[HY]*/
U_LB_IDEOGRAPHIC = 14, /*[ID]*/
- U_LB_INSEPERABLE = 15,
/** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
- U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
+ U_LB_INSEPARABLE = 15, /*[IN]*/
+ U_LB_INSEPERABLE = U_LB_INSEPARABLE,
U_LB_INFIX_NUMERIC = 16, /*[IS]*/
U_LB_LINE_FEED = 17, /*[LF]*/
U_LB_NONSTARTER = 18, /*[NS]*/
@@ -1679,7 +1811,10 @@ typedef enum ULineBreak {
U_LB_JT = 34, /*[JT]*/
U_LB_JV = 35, /*[JV]*/
U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
- U_LB_COUNT = 37
+ U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
+ U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
+ U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_LB_COUNT = 40
} ULineBreak;
/**
@@ -1689,7 +1824,13 @@ typedef enum ULineBreak {
* @stable ICU 2.2
*/
typedef enum UNumericType {
- U_NT_NONE, /*[None]*/ /*See note !!*/
+ /*
+ * Note: UNumericType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_NT_<Unicode Numeric_Type value name>
+ */
+
+ U_NT_NONE, /*[None]*/
U_NT_DECIMAL, /*[de]*/
U_NT_DIGIT, /*[di]*/
U_NT_NUMERIC, /*[nu]*/
@@ -1703,7 +1844,13 @@ typedef enum UNumericType {
* @stable ICU 2.6
*/
typedef enum UHangulSyllableType {
- U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/
+ /*
+ * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_HST_<Unicode Hangul_Syllable_Type value name>
+ */
+
+ U_HST_NOT_APPLICABLE, /*[NA]*/
U_HST_LEADING_JAMO, /*[L]*/
U_HST_VOWEL_JAMO, /*[V]*/
U_HST_TRAILING_JAMO, /*[T]*/
@@ -1906,6 +2053,8 @@ u_getIntPropertyMaxValue(UProperty which);
*
* For characters without any numeric values in the Unicode Character Database,
* this function will return U_NO_NUMERIC_VALUE.
+ * Note: This is different from the Unicode Standard which specifies NaN as the default value.
+ * (NaN is not available on all platforms.)
*
* Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
* also supports negative values, large values, and fractions,
@@ -2379,7 +2528,7 @@ u_isMirrored(UChar32 c);
* as the mirror-image of the default glyph of the specified
* character. This is useful for text conversion to and from
* codepages with visual order, and for displays without glyph
- * selecetion capabilities.
+ * selection capabilities.
*
* @param c the code point to be mapped
* @return another Unicode code point that may serve as a mirror-image
@@ -2394,6 +2543,25 @@ U_STABLE UChar32 U_EXPORT2
u_charMirror(UChar32 c);
/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ * or c itself if there is no such mapping
+ * (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_STABLE UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
+/**
* Returns the general category value for the code point.
*
* Same as java.lang.Character.getType().
@@ -2553,13 +2721,11 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
char *buffer, int32_t bufferLength,
UErrorCode *pErrorCode);
+#ifndef U_HIDE_DEPRECATED_API
/**
- * Get the ISO 10646 comment for a character.
- * The ISO 10646 comment is an informative field in the Unicode Character
- * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
- *
- * Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings
- * returned for all characters.
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
*
* @param c The character (code point) for which to get the ISO comment.
* It must be <code>0<=c<=0x10ffff</code>.
@@ -2570,18 +2736,15 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
* @param pErrorCode Pointer to a UErrorCode variable;
* check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
* returns.
- * @return The length of the comment, or 0 if there is no comment for this character.
- * If the destCapacity is less than or equal to the length, then the buffer
- * contains the truncated name and the returned length indicates the full
- * length of the name.
- * The length does not include the zero-termination.
+ * @return 0
*
- * @stable ICU 2.2
+ * @deprecated ICU 49
*/
U_STABLE int32_t U_EXPORT2
u_getISOComment(UChar32 c,
char *dest, int32_t destCapacity,
UErrorCode *pErrorCode);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Find a Unicode character by its name and return its code point value.