diff options
-rw-r--r-- | src/corelib/tools/qchar.cpp | 9 | ||||
-rw-r--r-- | src/corelib/tools/qchar.h | 11 | ||||
-rw-r--r-- | src/corelib/tools/qunicodetools.cpp | 61 | ||||
-rw-r--r-- | src/gui/text/qharfbuzzng.cpp | 10 | ||||
-rw-r--r-- | src/platformsupport/fontdatabases/fontconfig/qfontconfigdatabase.cpp | 8 | ||||
-rw-r--r-- | tests/auto/corelib/tools/qchar/tst_qchar.cpp | 4 | ||||
-rw-r--r-- | tests/manual/diaglib/textdump.cpp | 12 | ||||
-rw-r--r-- | util/unicode/main.cpp | 17 |
8 files changed, 101 insertions, 31 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp index 5469eee14d..9d2d90fc0d 100644 --- a/src/corelib/tools/qchar.cpp +++ b/src/corelib/tools/qchar.cpp @@ -179,8 +179,9 @@ QT_BEGIN_NAMESPACE \value Unicode_6_2 Version 6.2 \value Unicode_6_3 Version 6.3 Since Qt 5.3 \value Unicode_7_0 Version 7.0 Since Qt 5.5 + \value Unicode_8_0 Version 8.0 Since Qt 5.6 \value Unicode_Unassigned The value is not assigned to any character - in version 6.3 of Unicode. + in version 8.0 of Unicode. \sa unicodeVersion(), currentUnicodeVersion() */ @@ -401,6 +402,12 @@ QT_BEGIN_NAMESPACE \value Script_Khudawadi \value Script_Tirhuta \value Script_WarangCiti + \value Script_Ahom + \value Script_AnatolianHieroglyphs + \value Script_Hatran + \value Script_Multani + \value Script_OldHungarian + \value Script_SignWriting \omitvalue ScriptCount diff --git a/src/corelib/tools/qchar.h b/src/corelib/tools/qchar.h index fc1fc7bc16..9833380cc8 100644 --- a/src/corelib/tools/qchar.h +++ b/src/corelib/tools/qchar.h @@ -275,6 +275,14 @@ public: Script_Tirhuta, Script_WarangCiti, + // Unicode 8.0 additions + Script_Ahom, + Script_AnatolianHieroglyphs, + Script_Hatran, + Script_Multani, + Script_OldHungarian, + Script_SignWriting, + ScriptCount }; @@ -365,7 +373,8 @@ public: Unicode_6_1, Unicode_6_2, Unicode_6_3, - Unicode_7_0 + Unicode_7_0, + Unicode_8_0 }; // ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp index 56576c69f1..0df6b01b13 100644 --- a/src/corelib/tools/qunicodetools.cpp +++ b/src/corelib/tools/qunicodetools.cpp @@ -49,7 +49,7 @@ namespace QUnicodeTools { // ----------------------------------------------------------------------------------------------------- // // The text boundaries determination algorithm. -// See http://www.unicode.org/reports/tr29/tr29-25.html +// See http://www.unicode.org/reports/tr29/tr29-27.html // // ----------------------------------------------------------------------------------------------------- @@ -244,8 +244,9 @@ namespace SB { enum State { Initial, + Lower, Upper, - UpATerm, + LUATerm, ATerm, ATermC, ACS, @@ -260,10 +261,11 @@ enum State { static const uchar breakTable[BAfter + 1][QUnicodeTables::SentenceBreak_Close + 1] = { // Other CR LF Sep Extend Sp Lower Upper OLetter Numeric ATerm SContinue STerm Close - { Initial, BAfterC, BAfter , BAfter , Initial, Initial, Initial, Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial - { Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, UpATerm, STerm , STerm , Initial }, // Upper + { Initial, BAfterC, BAfter , BAfter , Initial, Initial, Lower , Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial + { Initial, BAfterC, BAfter , BAfter , Lower , Initial, Initial, Initial, Initial, Initial, LUATerm, Initial, STerm , Initial }, // Lower + { Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, LUATerm, STerm , STerm , Initial }, // Upper - { Lookup , BAfterC, BAfter , BAfter , UpATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // UpATerm + { Lookup , BAfterC, BAfter , BAfter , LUATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // LUATerm { Lookup , BAfterC, BAfter , BAfter , ATerm , ACS , Initial, Break , Break , Initial, ATerm , STerm , STerm , ATermC }, // ATerm { Lookup , BAfterC, BAfter , BAfter , ATermC , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , ATermC }, // ATermC { Lookup , BAfterC, BAfter , BAfter , ACS , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , Lookup }, // ACS @@ -341,7 +343,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes // ----------------------------------------------------------------------------------------------------- // // The line breaking algorithm. -// See http://www.unicode.org/reports/tr14/tr14-33.html +// See http://www.unicode.org/reports/tr14/tr14-35.html // // ----------------------------------------------------------------------------------------------------- @@ -408,26 +410,29 @@ inline Class toClass(QUnicodeTables::LineBreakClass lbc, QChar::Category categor /* In order to support the tailored implementation of LB25 properly the following changes were made in the pair table to allow breaks where the numeric expression doesn't match the template (i.e. [^NU](IS|SY)NU): - CL->PO from IB to DB - CP->PO from IB to DB - CL->PR from IB to DB - CP->PR from IB to DB - PO->OP from IB to DB - PR->OP from IB to DB - IS->NU from IB to DB - SY->NU from IB to DB + (CL)(PO) from IB to DB + (CP)(PO) from IB to DB + (CL)(PR) from IB to DB + (CP)(PR) from IB to DB + (PO)(OP) from IB to DB + (PR)(OP) from IB to DB + (IS)(NU) from IB to DB + (SY)(NU) from IB to DB */ -// The following line break classes are not treated by the pair table -// and must be resolved outside: -// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX +/* In order to implementat LB21a properly a special rule HH has been introduced and + the following changes were made in the pair table to disallow breaks after Hebrew + Hyphen: + (HL)(HY|BA) from IB to CI + (HY|BA)(!CB) from DB to HH +*/ enum Action { ProhibitedBreak, PB = ProhibitedBreak, DirectBreak, DB = DirectBreak, IndirectBreak, IB = IndirectBreak, CombiningIndirectBreak, CI = CombiningIndirectBreak, - CombiningProhibitedBreak, CP = CombiningProhibitedBreak + CombiningProhibitedBreak, CP = CombiningProhibitedBreak, + ProhibitedBreakAfterHebrewPlusHyphen, HH = ProhibitedBreakAfterHebrewPlusHyphen }; static const uchar breakTable[QUnicodeTables::LineBreak_CB + 1][QUnicodeTables::LineBreak_CB + 1] = { @@ -438,18 +443,18 @@ static const uchar breakTable[QUnicodeTables::LineBreak_CB + 1][QUnicodeTables:: /* QU */ { PB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB }, /* GL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB }, /* NS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, -/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, -/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, +/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, +/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* IS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* PR */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB, DB, DB }, /* PO */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* NU */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* AL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, -/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, +/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, CI, CI, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* ID */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* IN */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, -/* HY */ { DB, PB, PB, IB, DB, IB, PB, PB, PB, DB, DB, IB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, -/* BA */ { DB, PB, PB, IB, DB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, +/* HY */ { HH, PB, PB, IB, HH, IB, PB, PB, PB, HH, HH, IB, HH, HH, HH, HH, IB, IB, HH, HH, PB, CI, PB, HH, HH, HH, HH, HH, HH, DB }, +/* BA */ { HH, PB, PB, IB, HH, IB, PB, PB, PB, HH, HH, HH, HH, HH, HH, HH, IB, IB, HH, HH, PB, CI, PB, HH, HH, HH, HH, HH, HH, DB }, /* BB */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, DB }, /* B2 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB }, /* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB }, @@ -464,6 +469,10 @@ static const uchar breakTable[QUnicodeTables::LineBreak_CB + 1][QUnicodeTables:: /* CB */ { DB, PB, PB, IB, IB, DB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB } }; +// The following line break classes are not treated by the pair table +// and must be resolved outside: +// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX + } // namespace LB static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *attributes) @@ -555,6 +564,10 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at if (lcls != QUnicodeTables::LineBreak_SP) goto next_no_cls_update; break; + case LB::ProhibitedBreakAfterHebrewPlusHyphen: + if (lcls != QUnicodeTables::LineBreak_HL) + attributes[pos].lineBreak = true; + break; case LB::ProhibitedBreak: // nothing to do default: @@ -659,7 +672,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, // ---------------------------------------------------------------------------- // -// The Unicode script property. See http://www.unicode.org/reports/tr24/tr24-22.html +// The Unicode script property. See http://www.unicode.org/reports/tr24/tr24-24.html // // ---------------------------------------------------------------------------- diff --git a/src/gui/text/qharfbuzzng.cpp b/src/gui/text/qharfbuzzng.cpp index 102c62ea8a..b2edfc00a0 100644 --- a/src/gui/text/qharfbuzzng.cpp +++ b/src/gui/text/qharfbuzzng.cpp @@ -188,7 +188,15 @@ static const hb_script_t _qtscript_to_hbscript[] = { HB_SCRIPT_SIDDHAM, HB_SCRIPT_KHUDAWADI, HB_SCRIPT_TIRHUTA, - HB_SCRIPT_WARANG_CITI + HB_SCRIPT_WARANG_CITI, + + // Unicode 8.0 additions + HB_SCRIPT_AHOM, + HB_SCRIPT_ANATOLIAN_HIEROGLYPHS, + HB_SCRIPT_HATRAN, + HB_SCRIPT_MULTANI, + HB_SCRIPT_OLD_HUNGARIAN, + HB_SCRIPT_SIGNWRITING }; Q_STATIC_ASSERT(QChar::ScriptCount == sizeof(_qtscript_to_hbscript) / sizeof(_qtscript_to_hbscript[0])); diff --git a/src/platformsupport/fontdatabases/fontconfig/qfontconfigdatabase.cpp b/src/platformsupport/fontdatabases/fontconfig/qfontconfigdatabase.cpp index 022bc8bec2..0af9440b50 100644 --- a/src/platformsupport/fontdatabases/fontconfig/qfontconfigdatabase.cpp +++ b/src/platformsupport/fontdatabases/fontconfig/qfontconfigdatabase.cpp @@ -242,7 +242,13 @@ static const char *specialLanguages[] = { "sa", // Siddham "sd", // Khudawadi "mai", // Tirhuta - "hoc" // WarangCiti + "hoc", // WarangCiti + "", // Ahom + "", // AnatolianHieroglyphs + "", // Hatran + "", // Multani + "", // OldHungarian + "" // SignWriting }; Q_STATIC_ASSERT(sizeof(specialLanguages) / sizeof(const char *) == QChar::ScriptCount); diff --git a/tests/auto/corelib/tools/qchar/tst_qchar.cpp b/tests/auto/corelib/tools/qchar/tst_qchar.cpp index f80d6e6d93..22fbce9f8e 100644 --- a/tests/auto/corelib/tools/qchar/tst_qchar.cpp +++ b/tests/auto/corelib/tools/qchar/tst_qchar.cpp @@ -594,6 +594,10 @@ void tst_QChar::unicodeVersion() QVERIFY(QChar::unicodeVersion(0x20bd) == QChar::Unicode_7_0); QVERIFY(QChar::unicodeVersion(0x16b00) == QChar::Unicode_7_0); + QVERIFY(QChar(0x08b3).unicodeVersion() == QChar::Unicode_8_0); + QVERIFY(QChar::unicodeVersion(0x08b3) == QChar::Unicode_8_0); + QVERIFY(QChar::unicodeVersion(0x108e0) == QChar::Unicode_8_0); + QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned); QVERIFY(QChar::unicodeVersion(0x09ff) == QChar::Unicode_Unassigned); QVERIFY(QChar::unicodeVersion(0x110000) == QChar::Unicode_Unassigned); diff --git a/tests/manual/diaglib/textdump.cpp b/tests/manual/diaglib/textdump.cpp index ed4d5021be..0f69166a43 100644 --- a/tests/manual/diaglib/textdump.cpp +++ b/tests/manual/diaglib/textdump.cpp @@ -248,6 +248,15 @@ static const EnumLookup scriptEnumLookup[] = {QChar::Script_Tirhuta, "Script_Tirhuta"}, {QChar::Script_WarangCiti, "Script_WarangCiti"}, #endif // Qt 5.5 + +#if QT_VERSION >= 0x050600 + {QChar::Script_Ahom, "Script_Ahom"}, + {QChar::Script_AnatolianHieroglyphs, "Script_AnatolianHieroglyphs"}, + {QChar::Script_Hatran, "Script_Hatran"}, + {QChar::Script_Multani, "Script_Multani"}, + {QChar::Script_OldHungarian, "Script_OldHungarian"}, + {QChar::Script_SignWriting, "Script_SignWriting"}, +#endif // Qt 5.5 }; #endif // Qt 5.1 @@ -364,6 +373,9 @@ static const EnumLookup unicodeVersionEnumLookup[] = #if QT_VERSION >= 0x050500 {QChar::Unicode_7_0, "Unicode_7_0"}, #endif // Qt 5.5 +#if QT_VERSION >= 0x050600 + {QChar::Unicode_8_0, "Unicode_8_0"}, +#endif // Qt 5.6 #endif // Qt 5 }; diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 2f6e28bd9f..ab37bffe87 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -43,8 +43,8 @@ #include <private/qunicodetables_p.h> #endif -#define DATA_VERSION_S "7.0" -#define DATA_VERSION_STR "QChar::Unicode_7_0" +#define DATA_VERSION_S "8.0" +#define DATA_VERSION_STR "QChar::Unicode_8_0" static QHash<QByteArray, QChar::UnicodeVersion> age_map; @@ -71,6 +71,7 @@ static void initAgeMap() { QChar::Unicode_6_2, "6.2" }, { QChar::Unicode_6_3, "6.3" }, { QChar::Unicode_7_0, "7.0" }, + { QChar::Unicode_8_0, "8.0" }, { QChar::Unicode_Unassigned, 0 } }; AgeMap *d = ageMap; @@ -719,6 +720,13 @@ static void initScriptMap() { QChar::Script_Khudawadi, "Khudawadi" }, { QChar::Script_Tirhuta, "Tirhuta" }, { QChar::Script_WarangCiti, "WarangCiti" }, + // 8.0 + { QChar::Script_Ahom, "Ahom" }, + { QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" }, + { QChar::Script_Hatran, "Hatran" }, + { QChar::Script_Multani, "Multani" }, + { QChar::Script_OldHungarian, "OldHungarian" }, + { QChar::Script_SignWriting, "SignWriting" }, // unhandled { QChar::Script_Unknown, 0 } }; @@ -946,13 +954,16 @@ struct UnicodeData { p.lineBreakClass = LineBreak_AL; // XX -> AL // LineBreak.txt // The unassigned code points that default to "ID" include ranges in the following blocks: - // [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2F800..U+2FA1F, U+20000..U+2FFFD, U+30000..U+3FFFD] + // [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2B820..U+2CEAF, U+2F800..U+2FA1F] + // and any other reserved code points on + // [U+20000..U+2FFFD, U+30000..U+3FFFD] if ((codepoint >= 0x3400 && codepoint <= 0x4DBF) || (codepoint >= 0x4E00 && codepoint <= 0x9FFF) || (codepoint >= 0xF900 && codepoint <= 0xFAFF) || (codepoint >= 0x20000 && codepoint <= 0x2A6DF) || (codepoint >= 0x2A700 && codepoint <= 0x2B73F) || (codepoint >= 0x2B740 && codepoint <= 0x2B81F) + || (codepoint >= 0x2B820 && codepoint <= 0x2CEAF) || (codepoint >= 0x2F800 && codepoint <= 0x2FA1F) || (codepoint >= 0x20000 && codepoint <= 0x2FFFD) || (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) { |