From fc11d2e872bfcfbd07b4f81114e0336d10650b84 Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Tue, 9 Oct 2018 13:00:35 -0700 Subject: [core, test] Add BiDi unit test - Port of arabic.test.js from mapbox-gl-rtl-text - Modify BiDi::getLine to remove trailing nulls in the event UBIDI_REMOVE_BIDI_CONTROLS causes the string to shorten. - Patch vendored ICU to avoid undefined undefined bit shifting behavior (triggered sanitizer failure) --- circle.yml | 2 +- cmake/test-files.txt | 1 + platform/default/bidi.cpp | 9 +++-- scripts/vendor/icu.sh | 18 ++++++++++ test/text/bidi.test.cpp | 90 +++++++++++++++++++++++++++++++++++++++++++++++ vendor/icu/src/ubidiimp.h | 4 +-- 6 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 test/text/bidi.test.cpp diff --git a/circle.yml b/circle.yml index 087418de3d..04ce610020 100644 --- a/circle.yml +++ b/circle.yml @@ -1066,7 +1066,7 @@ jobs: JOBS: 1 # https://github.com/mapbox/mapbox-gl-native/issues/9108 command: | xvfb-run --server-args="-screen 0 1024x768x24" \ - build/qt-linux-x86_64/Release/mbgl-test --gtest_filter=-*.Load --gtest_filter=-Memory.Vector + build/qt-linux-x86_64/Release/mbgl-test --gtest_filter=-*.Load --gtest_filter=-Memory.Vector --gtest_filter=-BiDi.* # ------------------------------------------------------------------------------ qt5-macos-debug: diff --git a/cmake/test-files.txt b/cmake/test-files.txt index a94004aa32..89283e2aea 100644 --- a/cmake/test-files.txt +++ b/cmake/test-files.txt @@ -109,6 +109,7 @@ test/src/mbgl/test/util.cpp test/src/mbgl/test/util.hpp # text +test/text/bidi.test.cpp test/text/cross_tile_symbol_index.test.cpp test/text/glyph_manager.test.cpp test/text/glyph_pbf.test.cpp diff --git a/platform/default/bidi.cpp b/platform/default/bidi.cpp index f972e43cc1..32a3dc23ef 100644 --- a/platform/default/bidi.cpp +++ b/platform/default/bidi.cpp @@ -220,8 +220,13 @@ std::u16string BiDi::getLine(std::size_t start, std::size_t end) { // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that // they don't show up on screen (some fonts have glyphs representing them) - ubidi_writeReordered(impl->bidiLine, mbgl::utf16char_cast(&outputText[0]), outputLength, - UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode); + int32_t finalLength = ubidi_writeReordered(impl->bidiLine, + mbgl::utf16char_cast(&outputText[0]), + outputLength, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, + &errorCode); + + outputText.resize(finalLength); // REMOVE_BIDI_CONTROLS may have shrunk the string if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") + diff --git a/scripts/vendor/icu.sh b/scripts/vendor/icu.sh index 8ee9cd76fb..7a2c635080 100755 --- a/scripts/vendor/icu.sh +++ b/scripts/vendor/icu.sh @@ -49,6 +49,24 @@ for FILE in "${ALL[@]}"; do sed 's/^#include \"\(unicode\/[^\"]\{1,\}\)\"/#include <\1>/' "common/$FILE" > "$DIR/$FILE" done +# Apply patch from https://github.com/LibreOffice/core/blob/master/external/icu/icu4c-ubsan.patch.1 +# Shifting signed int to a number greater than can be represented is undefined behavior +patch -p0 << PATCH +--- src/ubidiimp.h ++++ src/ubidiimp.h +@@ -198,8 +198,8 @@ + /* in a Run, logicalStart will get this bit set if the run level is odd */ + #define INDEX_ODD_BIT (1UL<<31) + +-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) ++#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)(level)<<31)) +-#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) ++#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)(level)<<31)) + #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) + + #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) +PATCH + rm -rf common file_list include src -name "*.h" -o -name "*.cpp" diff --git a/test/text/bidi.test.cpp b/test/text/bidi.test.cpp new file mode 100644 index 0000000000..7c0eb82884 --- /dev/null +++ b/test/text/bidi.test.cpp @@ -0,0 +1,90 @@ + +#include + +#include + +using namespace mbgl; + +/* + These tests mirror the unit tests in mapbox-gl-rtl-text's arabic.test.js + + The arabic text in results may appear to be backwards + This is because whatever you're viewing the text with is + applying the bidirectional algorithm a second time. + + Although they may look the same as input in your viewer, the + characters in the test results are "presentation forms" of + the characters. + + To closely inspect the inputs and outputs, use a binary/hex editor. +*/ + +TEST(BiDi, ArabicShaping) { + EXPECT_EQ(applyArabicShaping(u"اليَمَن‎‎"), u"ﺍﻟﻴﹷﻤﹷﻦ‎‎"); +} + +TEST(BiDi, Tashkeel) { + EXPECT_EQ(applyArabicShaping(u"سلام۳۹"), u"ﺳﻼﻡ۳۹"); +} + +TEST(BiDi, MixedShaping) { + EXPECT_EQ(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), + u"ﻣﻜﺘﺒﺔ ﺍﻹﺳﻜﻨﺪﺭﻳﺔ‎‎ Maktabat al-Iskandarīyah"); +} + +TEST(BiDi, ReverseArabic) { + BiDi bidi; + EXPECT_EQ(bidi.processText(applyArabicShaping(u"سلام۳۹"), {}), + std::vector{ u"۳۹ﻡﻼﺳ" }); +} + +TEST(BiDi, ReverseMixed) { + BiDi bidi; + EXPECT_EQ(bidi.processText(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), {}), + std::vector{ u" Maktabat al-Iskandarīyahﺔﻳﺭﺪﻨﻜﺳﻹﺍ ﺔﺒﺘﻜﻣ" }); +} + +TEST(BiDi, WithLineBreaks) { + BiDi bidi; + std::vector expected; + expected.emplace_back(u" ﺔﻳﺭﺪﻨﻜﺳﻹﺍ ﺔﺒﺘﻜﻣ"); + expected.emplace_back(u"Maktabat al-"); + expected.emplace_back(u"Iskandarīyah"); + EXPECT_EQ(bidi.processText(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), { 18, 30 }), + expected); +} + +TEST(BiDi, StyledText) { + // This test uses line breaks that intentionally split/reorder/interleave styled blocks + // that are contiguous in the input + BiDi bidi; + std::vector expected; + StyledText input( + applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), + std::vector{ 0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,4,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7 } + ); + + expected.emplace_back(StyledText( + u"ﺔﺒﺘﻜﻣ", + std::vector{ 0,0,0,0,0 } + )); + EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size()); + expected.emplace_back(StyledText( + u" ‎‎ﺔﻳﺭﺪﻨﻜﺳﻹﺍ ", + std::vector{ 2,2,2,2,2,2,2,1,1,1,1,1,1 } + )); + EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size()); + expected.emplace_back(StyledText( + u"Maktabat al-", + std::vector{ 2,3,3,3,3,3,4,5,5,5,5,6 } + )); + EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size()); + expected.emplace_back(StyledText( + u"Iskandarīyah", + std::vector{ 6,6,6,6,6,6,6,6,6,7,7,7 } + )); + EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size()); + + EXPECT_EQ(bidi.processStyledText(input, { 5, 18, 30 }), expected); +} + diff --git a/vendor/icu/src/ubidiimp.h b/vendor/icu/src/ubidiimp.h index e070125781..16516614ab 100644 --- a/vendor/icu/src/ubidiimp.h +++ b/vendor/icu/src/ubidiimp.h @@ -198,8 +198,8 @@ typedef struct Run { /* in a Run, logicalStart will get this bit set if the run level is odd */ #define INDEX_ODD_BIT (1UL<<31) -#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) -#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) +#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)(level)<<31)) +#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)(level)<<31)) #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) -- cgit v1.2.1