From a503e0bc274888e915301ebf881a2ea61fcb264d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Minh=20Nguye=CC=82=CC=83n?= Date: Fri, 30 Jun 2017 00:36:40 -0700 Subject: [core] Updated script detection for Unicode 10 Updated script detection code to reflect changes in Unicode 10 and UTR 50 revision 17. --- src/mbgl/util/i18n.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp index ada6f6526c..16f1d669f3 100644 --- a/src/mbgl/util/i18n.cpp +++ b/src/mbgl/util/i18n.cpp @@ -15,7 +15,7 @@ namespace { return codepoint >= first && codepoint <= last; \ } -// The following table comes from . +// The following table comes from . // Keep it synchronized with . // DEFINE_IS_IN_UNICODE_BLOCK(BasicLatin, 0x0000, 0x007F) @@ -37,6 +37,7 @@ DEFINE_IS_IN_UNICODE_BLOCK(ArabicSupplement, 0x0750, 0x077F) // DEFINE_IS_IN_UNICODE_BLOCK(NKo, 0x07C0, 0x07FF) // DEFINE_IS_IN_UNICODE_BLOCK(Samaritan, 0x0800, 0x083F) // DEFINE_IS_IN_UNICODE_BLOCK(Mandaic, 0x0840, 0x085F) +// DEFINE_IS_IN_UNICODE_BLOCK(Syriac Supplement, 0x0860, 0x086F) DEFINE_IS_IN_UNICODE_BLOCK(ArabicExtendedA, 0x08A0, 0x08FF) // DEFINE_IS_IN_UNICODE_BLOCK(Devanagari, 0x0900, 0x097F) // DEFINE_IS_IN_UNICODE_BLOCK(Bengali, 0x0980, 0x09FF) @@ -239,9 +240,12 @@ DEFINE_IS_IN_UNICODE_BLOCK(HalfwidthandFullwidthForms, 0xFF00, 0xFFEF) // DEFINE_IS_IN_UNICODE_BLOCK(Takri, 0x11680, 0x116CF) // DEFINE_IS_IN_UNICODE_BLOCK(Ahom, 0x11700, 0x1173F) // DEFINE_IS_IN_UNICODE_BLOCK(WarangCiti, 0x118A0, 0x118FF) +// DEFINE_IS_IN_UNICODE_BLOCK(ZanabazarSquare, 0x11A00, 0x11A4F) +// DEFINE_IS_IN_UNICODE_BLOCK(Soyombo, 0x11A50, 0x11AAF) // DEFINE_IS_IN_UNICODE_BLOCK(PauCinHau, 0x11AC0, 0x11AFF) // DEFINE_IS_IN_UNICODE_BLOCK(Bhaiksuki, 0x11C00, 0x11C6F) // DEFINE_IS_IN_UNICODE_BLOCK(Marchen, 0x11C70, 0x11CBF) +// DEFINE_IS_IN_UNICODE_BLOCK(MasaramGondi, 0x11D00, 0x11D5F) // DEFINE_IS_IN_UNICODE_BLOCK(Cuneiform, 0x12000, 0x123FF) // DEFINE_IS_IN_UNICODE_BLOCK(CuneiformNumbersandPunctuation, 0x12400, 0x1247F) // DEFINE_IS_IN_UNICODE_BLOCK(EarlyDynasticCuneiform, 0x12480, 0x1254F) @@ -256,6 +260,8 @@ DEFINE_IS_IN_UNICODE_BLOCK(HalfwidthandFullwidthForms, 0xFF00, 0xFFEF) // DEFINE_IS_IN_UNICODE_BLOCK(Tangut, 0x17000, 0x187FF) // DEFINE_IS_IN_UNICODE_BLOCK(TangutComponents, 0x18800, 0x18AFF) // DEFINE_IS_IN_UNICODE_BLOCK(KanaSupplement, 0x1B000, 0x1B0FF) +// DEFINE_IS_IN_UNICODE_BLOCK(KanaExtendedA, 0x1B100, 0x1B12F) +// DEFINE_IS_IN_UNICODE_BLOCK(Nushu, 0x1B170, 0x1B2FF) // DEFINE_IS_IN_UNICODE_BLOCK(Duployan, 0x1BC00, 0x1BC9F) // DEFINE_IS_IN_UNICODE_BLOCK(ShorthandFormatControls, 0x1BCA0, 0x1BCAF) // DEFINE_IS_IN_UNICODE_BLOCK(ByzantineMusicalSymbols, 0x1D000, 0x1D0FF) @@ -286,6 +292,7 @@ DEFINE_IS_IN_UNICODE_BLOCK(HalfwidthandFullwidthForms, 0xFF00, 0xFFEF) // DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionC, 0x2A700, 0x2B73F) // DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionD, 0x2B740, 0x2B81F) // DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionE, 0x2B820, 0x2CEAF) +// DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionF, 0x2CEB0, 0x2EBEF) // DEFINE_IS_IN_UNICODE_BLOCK(CJKCompatibilityIdeographsSupplement, 0x2F800, 0x2FA1F) // DEFINE_IS_IN_UNICODE_BLOCK(Tags, 0xE0000, 0xE007F) // DEFINE_IS_IN_UNICODE_BLOCK(VariationSelectorsSupplement, 0xE0100, 0xE01EF) @@ -375,11 +382,13 @@ bool allowsIdeographicBreaking(char16_t chr) { // return (isInTangut(chr) // || isInTangutComponents(chr) // || isInIdeographicSymbolsandPunctuation(chr) + // || isInNushu(chr) // || isInEnclosedIdeographicSupplement(chr) // || isInCJKUnifiedIdeographsExtensionB(chr) // || isInCJKUnifiedIdeographsExtensionC(chr) // || isInCJKUnifiedIdeographsExtensionD(chr) // || isInCJKUnifiedIdeographsExtensionE(chr) + // || isInCJKUnifiedIdeographsExtensionF(chr) // || isInCJKCompatibilityIdeographsSupplement(chr)); } @@ -393,7 +402,7 @@ bool allowsVerticalWritingMode(const std::u16string& string) { } // The following logic comes from -// . +// . // The data file denotes with “U” or “Tu” any codepoint that may be drawn // upright in vertical text but does not distinguish between upright and // “neutral” characters. @@ -457,6 +466,8 @@ bool hasUprightVerticalOrientation(char16_t chr) { // if (isInTangut(chr)) return true; // if (isInTangutComponents(chr)) return true; // if (isInKanaSupplement(chr)) return true; + // if (isInKanaExtendedA(chr)) return true; + // if (isInNushu(chr)) return true; // if (isInByzantineMusicalSymbols(chr)) return true; // if (isInMusicalSymbols(chr)) return true; // if (isInTaiXuanJingSymbols(chr)) return true; @@ -478,6 +489,7 @@ bool hasUprightVerticalOrientation(char16_t chr) { // if (isInCJKUnifiedIdeographsExtensionC(chr)) return true; // if (isInCJKUnifiedIdeographsExtensionD(chr)) return true; // if (isInCJKUnifiedIdeographsExtensionE(chr)) return true; + // if (isInCJKUnifiedIdeographsExtensionF(chr)) return true; // if (isInCJKCompatibilityIdeographsSupplement(chr)) return true; return false; -- cgit v1.2.1