summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--circle.yml2
-rw-r--r--cmake/test-files.txt1
-rw-r--r--platform/default/bidi.cpp9
-rwxr-xr-xscripts/vendor/icu.sh18
-rw-r--r--test/text/bidi.test.cpp90
-rw-r--r--vendor/icu/src/ubidiimp.h4
6 files changed, 119 insertions, 5 deletions
diff --git a/circle.yml b/circle.yml
index 087418de3d..04ce610020 100644
--- a/circle.yml
+++ b/circle.yml
@@ -1066,7 +1066,7 @@ jobs:
JOBS: 1 # https://github.com/mapbox/mapbox-gl-native/issues/9108
command: |
xvfb-run --server-args="-screen 0 1024x768x24" \
- build/qt-linux-x86_64/Release/mbgl-test --gtest_filter=-*.Load --gtest_filter=-Memory.Vector
+ build/qt-linux-x86_64/Release/mbgl-test --gtest_filter=-*.Load --gtest_filter=-Memory.Vector --gtest_filter=-BiDi.*
# ------------------------------------------------------------------------------
qt5-macos-debug:
diff --git a/cmake/test-files.txt b/cmake/test-files.txt
index a94004aa32..89283e2aea 100644
--- a/cmake/test-files.txt
+++ b/cmake/test-files.txt
@@ -109,6 +109,7 @@ test/src/mbgl/test/util.cpp
test/src/mbgl/test/util.hpp
# text
+test/text/bidi.test.cpp
test/text/cross_tile_symbol_index.test.cpp
test/text/glyph_manager.test.cpp
test/text/glyph_pbf.test.cpp
diff --git a/platform/default/bidi.cpp b/platform/default/bidi.cpp
index f972e43cc1..32a3dc23ef 100644
--- a/platform/default/bidi.cpp
+++ b/platform/default/bidi.cpp
@@ -220,8 +220,13 @@ std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
// UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
// UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
// they don't show up on screen (some fonts have glyphs representing them)
- ubidi_writeReordered(impl->bidiLine, mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength,
- UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);
+ int32_t finalLength = ubidi_writeReordered(impl->bidiLine,
+ mbgl::utf16char_cast<UChar*>(&outputText[0]),
+ outputLength,
+ UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS,
+ &errorCode);
+
+ outputText.resize(finalLength); // REMOVE_BIDI_CONTROLS may have shrunk the string
if (U_FAILURE(errorCode)) {
throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") +
diff --git a/scripts/vendor/icu.sh b/scripts/vendor/icu.sh
index 8ee9cd76fb..7a2c635080 100755
--- a/scripts/vendor/icu.sh
+++ b/scripts/vendor/icu.sh
@@ -49,6 +49,24 @@ for FILE in "${ALL[@]}"; do
sed 's/^#include \"\(unicode\/[^\"]\{1,\}\)\"/#include <\1>/' "common/$FILE" > "$DIR/$FILE"
done
+# Apply patch from https://github.com/LibreOffice/core/blob/master/external/icu/icu4c-ubsan.patch.1
+# Shifting signed int to a number greater than can be represented is undefined behavior
+patch -p0 << PATCH
+--- src/ubidiimp.h
++++ src/ubidiimp.h
+@@ -198,8 +198,8 @@
+ /* in a Run, logicalStart will get this bit set if the run level is odd */
+ #define INDEX_ODD_BIT (1UL<<31)
+
+-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
++#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)(level)<<31))
+-#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
++#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)(level)<<31))
+ #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
+
+ #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
+PATCH
+
rm -rf common
file_list include src -name "*.h" -o -name "*.cpp"
diff --git a/test/text/bidi.test.cpp b/test/text/bidi.test.cpp
new file mode 100644
index 0000000000..7c0eb82884
--- /dev/null
+++ b/test/text/bidi.test.cpp
@@ -0,0 +1,90 @@
+
+#include <mbgl/test/util.hpp>
+
+#include <mbgl/text/bidi.hpp>
+
+using namespace mbgl;
+
+/*
+ These tests mirror the unit tests in mapbox-gl-rtl-text's arabic.test.js
+
+ The arabic text in results may appear to be backwards
+ This is because whatever you're viewing the text with is
+ applying the bidirectional algorithm a second time.
+
+ Although they may look the same as input in your viewer, the
+ characters in the test results are "presentation forms" of
+ the characters.
+
+ To closely inspect the inputs and outputs, use a binary/hex editor.
+*/
+
+TEST(BiDi, ArabicShaping) {
+ EXPECT_EQ(applyArabicShaping(u"اليَمَن‎‎"), u"ﺍﻟﻴﹷﻤﹷﻦ‎‎");
+}
+
+TEST(BiDi, Tashkeel) {
+ EXPECT_EQ(applyArabicShaping(u"سلام۳۹"), u"ﺳﻼﻡ۳۹");
+}
+
+TEST(BiDi, MixedShaping) {
+ EXPECT_EQ(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"),
+ u"ﻣﻜﺘﺒﺔ ﺍﻹﺳﻜﻨﺪﺭﻳﺔ‎‎ Maktabat al-Iskandarīyah");
+}
+
+TEST(BiDi, ReverseArabic) {
+ BiDi bidi;
+ EXPECT_EQ(bidi.processText(applyArabicShaping(u"سلام۳۹"), {}),
+ std::vector<std::u16string>{ u"۳۹ﻡﻼﺳ" });
+}
+
+TEST(BiDi, ReverseMixed) {
+ BiDi bidi;
+ EXPECT_EQ(bidi.processText(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), {}),
+ std::vector<std::u16string>{ u" Maktabat al-Iskandarīyahﺔﻳﺭﺪﻨﻜﺳﻹﺍ ﺔﺒﺘﻜﻣ" });
+}
+
+TEST(BiDi, WithLineBreaks) {
+ BiDi bidi;
+ std::vector<std::u16string> expected;
+ expected.emplace_back(u" ﺔﻳﺭﺪﻨﻜﺳﻹﺍ ﺔﺒﺘﻜﻣ");
+ expected.emplace_back(u"Maktabat al-");
+ expected.emplace_back(u"Iskandarīyah");
+ EXPECT_EQ(bidi.processText(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), { 18, 30 }),
+ expected);
+}
+
+TEST(BiDi, StyledText) {
+ // This test uses line breaks that intentionally split/reorder/interleave styled blocks
+ // that are contiguous in the input
+ BiDi bidi;
+ std::vector<StyledText> expected;
+ StyledText input(
+ applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"),
+ std::vector<uint8_t>{ 0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,4,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7 }
+ );
+
+ expected.emplace_back(StyledText(
+ u"ﺔﺒﺘﻜﻣ",
+ std::vector<uint8_t>{ 0,0,0,0,0 }
+ ));
+ EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
+ expected.emplace_back(StyledText(
+ u" ‎‎ﺔﻳﺭﺪﻨﻜﺳﻹﺍ ",
+ std::vector<uint8_t>{ 2,2,2,2,2,2,2,1,1,1,1,1,1 }
+ ));
+ EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
+ expected.emplace_back(StyledText(
+ u"Maktabat al-",
+ std::vector<uint8_t>{ 2,3,3,3,3,3,4,5,5,5,5,6 }
+ ));
+ EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
+ expected.emplace_back(StyledText(
+ u"Iskandarīyah",
+ std::vector<uint8_t>{ 6,6,6,6,6,6,6,6,6,7,7,7 }
+ ));
+ EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
+
+ EXPECT_EQ(bidi.processStyledText(input, { 5, 18, 30 }), expected);
+}
+
diff --git a/vendor/icu/src/ubidiimp.h b/vendor/icu/src/ubidiimp.h
index e070125781..16516614ab 100644
--- a/vendor/icu/src/ubidiimp.h
+++ b/vendor/icu/src/ubidiimp.h
@@ -198,8 +198,8 @@ typedef struct Run {
/* in a Run, logicalStart will get this bit set if the run level is odd */
#define INDEX_ODD_BIT (1UL<<31)
-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
-#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
+#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)(level)<<31))
+#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)(level)<<31))
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)