From bceeba29a3fa85105c21718ed0be8704508ab585 Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Thu, 17 Nov 2016 14:12:28 -0800 Subject: [core] Add ICU package for Bidirectional text support and arabic text shaping. Apply bidi and shaping in symbol_layout. Add utility functions for converting to and from UTF-16. --- CMakeLists.txt | 1 + cmake/core-files.cmake | 2 ++ cmake/core.cmake | 1 + package.json | 2 +- src/mbgl/layout/symbol_layout.cpp | 4 ++- src/mbgl/layout/symbol_layout.hpp | 3 +++ src/mbgl/text/bidi.cpp | 53 +++++++++++++++++++++++++++++++++++++++ src/mbgl/text/bidi.hpp | 22 ++++++++++++++++ src/mbgl/util/utf.hpp | 8 +++--- 9 files changed, 89 insertions(+), 7 deletions(-) create mode 100644 src/mbgl/text/bidi.cpp create mode 100644 src/mbgl/text/bidi.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 064710780f..6358976e6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ mason_use(earcut VERSION 0.12.1 HEADER_ONLY) mason_use(protozero VERSION 1.4.2 HEADER_ONLY) mason_use(pixelmatch VERSION 0.10.0 HEADER_ONLY) mason_use(geojson VERSION 0.3.2 HEADER_ONLY) +mason_use(icu VERSION 58.1) if(WITH_COVERAGE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") diff --git a/cmake/core-files.cmake b/cmake/core-files.cmake index 0bde8505e8..bbfec6a4c2 100644 --- a/cmake/core-files.cmake +++ b/cmake/core-files.cmake @@ -360,6 +360,8 @@ set(MBGL_CORE_FILES src/mbgl/text/quads.hpp src/mbgl/text/shaping.cpp src/mbgl/text/shaping.hpp + src/mbgl/text/bidi.cpp + src/mbgl/text/bidi.hpp # tile src/mbgl/tile/geojson_tile.cpp diff --git a/cmake/core.cmake b/cmake/core.cmake index bc5e7eb718..9c1bf4a6f2 100644 --- a/cmake/core.cmake +++ b/cmake/core.cmake @@ -45,6 +45,7 @@ target_add_mason_package(mbgl-core PRIVATE supercluster) target_add_mason_package(mbgl-core PRIVATE kdbush) target_add_mason_package(mbgl-core PRIVATE earcut) target_add_mason_package(mbgl-core PRIVATE protozero) +target_add_mason_package(mbgl-core PRIVATE icu) mbgl_platform_core() diff --git a/package.json b/package.json index 598e1fef44..8b8e57460b 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,7 @@ "lodash": "^4.16.4", "mapbox-gl-shaders": "mapbox/mapbox-gl-shaders#597115a1e1bd982944b068f8accde34eada74fc2", "mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#7f62a4fc9f21e619824d68abbc4b03cbc1685572", - "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#87192085b3c1ebe668524511bfba28381e5eb627", + "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#c32d0c5ac80e3b7393bc17b8944e64fa5cffd90a", "mkdirp": "^0.5.1", "node-cmake": "^1.2.1", "request": "^2.72.0", diff --git a/src/mbgl/layout/symbol_layout.cpp b/src/mbgl/layout/symbol_layout.cpp index 7f8e8d5f83..00395fafc2 100644 --- a/src/mbgl/layout/symbol_layout.cpp +++ b/src/mbgl/layout/symbol_layout.cpp @@ -20,6 +20,8 @@ #include #include +#include + namespace mbgl { using namespace style; @@ -90,7 +92,7 @@ SymbolLayout::SymbolLayout(std::string bucketName_, u8string = platform::lowercase(u8string); } - ft.text = util::utf8_to_utf16::convert( u8string ); + ft.text = bidi.bidiTransform(util::utf8_to_utf16::convert(u8string)); // Loop through all characters of this text and collect unique codepoints. for (char16_t chr : *ft.text) { diff --git a/src/mbgl/layout/symbol_layout.hpp b/src/mbgl/layout/symbol_layout.hpp index 9a580900dd..c21398fabf 100644 --- a/src/mbgl/layout/symbol_layout.hpp +++ b/src/mbgl/layout/symbol_layout.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,8 @@ private: GlyphRangeSet ranges; std::vector symbolInstances; std::vector features; + + BiDi bidi; // Consider moving this up to geometry tile worker to reduce reinstantiation costs; use of BiDi/ubiditransform object must be constrained to one thread }; } // namespace mbgl diff --git a/src/mbgl/text/bidi.cpp b/src/mbgl/text/bidi.cpp new file mode 100644 index 0000000000..2b6967110c --- /dev/null +++ b/src/mbgl/text/bidi.cpp @@ -0,0 +1,53 @@ +#include + +#include +#include +#include + +namespace mbgl { + +BiDi::BiDi() { + UErrorCode errorCode = U_ZERO_ERROR; + transform = ubiditransform_open(&errorCode); // Only error is failure to allocate memory, in + // that case ubidi_transform would fall back to + // creating transform object on the fly +} + +BiDi::~BiDi() { + if (transform) + ubiditransform_close(transform); +} + +std::u16string BiDi::bidiTransform(const std::u16string& input) { + UErrorCode errorCode = U_ZERO_ERROR; + + std::unique_ptr outputText = + std::make_unique(input.size() * 2); // Maximum output of ubidi_transform is twice + // the size of input according to + // ubidi_transform.h + uint32_t outputLength = ubiditransform_transform( + transform, input.c_str(), static_cast(input.size()), outputText.get(), + static_cast(input.size()) * 2, + UBIDI_DEFAULT_LTR, // Assume input is LTR unless strong RTL characters are found + UBIDI_LOGICAL, // Input is in logical order + UBIDI_LTR, // Output is in "visual LTR" order + UBIDI_VISUAL, // '' + UBIDI_MIRRORING_ON, // Use mirroring lookups for things like parentheses that need mirroring + // in RTL text + U_SHAPE_LETTERS_SHAPE, // Add options here for handling numbers in bidirectional text + &errorCode); + + // If the algorithm fails for any reason, fall back to non-transformed text + if (U_FAILURE(errorCode)) + return input; + + return std::u16string(outputText.get(), outputLength); +} + +bool BiDi::baseDirectionRightToLeft(const std::u16string& input) { + // This just looks for the first character with a strong direction property, it does not perform + // the BiDi algorithm + return ubidi_getBaseDirection(input.c_str(), static_cast(input.size())) == UBIDI_RTL; +} + +} // end namespace mbgl diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp new file mode 100644 index 0000000000..030ac88ce2 --- /dev/null +++ b/src/mbgl/text/bidi.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include +#include + +namespace mbgl { + +class BiDi : private util::noncopyable { +public: + BiDi(); + ~BiDi(); + + std::u16string bidiTransform(const std::u16string&); + bool baseDirectionRightToLeft(const std::u16string&); + +private: + UBiDiTransform* transform; +}; + +} // end namespace mbgl diff --git a/src/mbgl/util/utf.hpp b/src/mbgl/util/utf.hpp index 386e56bef8..81330cfc83 100644 --- a/src/mbgl/util/utf.hpp +++ b/src/mbgl/util/utf.hpp @@ -10,12 +10,10 @@ namespace util { class utf8_to_utf16 { public: - static std::u16string convert(std::string const& utf8) - { - std::wstring_convert,char16_t> converter; - return converter.from_bytes( utf8 ); + static std::u16string convert(std::string const& utf8) { + std::wstring_convert, char16_t> converter; + return converter.from_bytes(utf8); } - }; } // namespace util -- cgit v1.2.1