From e8ec25f6259aea3fea539029aa84b83f64543c44 Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Sat, 7 Jan 2017 11:35:48 -0800 Subject: [core] Use ICU for Arabic text support - Supports bidirectional text layout - New "raggedness minimizing" line breaking algorithm --- CMakeLists.txt | 1 + cmake/core-files.cmake | 2 + cmake/core.cmake | 1 + platform/default/mbgl/storage/offline_download.cpp | 2 +- platform/default/string_stdlib.cpp | 2 +- src/mbgl/layout/merge_lines.cpp | 4 +- src/mbgl/layout/symbol_feature.hpp | 2 +- src/mbgl/layout/symbol_layout.cpp | 9 +- src/mbgl/layout/symbol_layout.hpp | 7 +- src/mbgl/text/bidi.cpp | 127 +++++++++ src/mbgl/text/bidi.hpp | 32 +++ src/mbgl/text/glyph.cpp | 2 +- src/mbgl/text/glyph.hpp | 6 +- src/mbgl/text/glyph_atlas.cpp | 4 +- src/mbgl/text/glyph_atlas.hpp | 2 +- src/mbgl/text/glyph_set.cpp | 316 +++++++++++++++------ src/mbgl/text/glyph_set.hpp | 33 ++- src/mbgl/util/utf.hpp | 15 +- test/util/merge_lines.test.cpp | 4 +- 19 files changed, 447 insertions(+), 124 deletions(-) create mode 100644 src/mbgl/text/bidi.cpp create mode 100644 src/mbgl/text/bidi.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e99f8031e..08dd089924 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ mason_use(earcut VERSION 0.12.1 HEADER_ONLY) mason_use(protozero VERSION 1.4.2 HEADER_ONLY) mason_use(pixelmatch VERSION 0.10.0 HEADER_ONLY) mason_use(geojson VERSION 0.3.2 HEADER_ONLY) +mason_use(icu VERSION 58.1) if(WITH_COVERAGE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") diff --git a/cmake/core-files.cmake b/cmake/core-files.cmake index 4d5cddbc49..2c2686a02d 100644 --- a/cmake/core-files.cmake +++ b/cmake/core-files.cmake @@ -372,6 +372,8 @@ set(MBGL_CORE_FILES src/mbgl/text/quads.hpp src/mbgl/text/shaping.cpp src/mbgl/text/shaping.hpp + src/mbgl/text/bidi.cpp + src/mbgl/text/bidi.hpp # tile src/mbgl/tile/geojson_tile.cpp diff --git a/cmake/core.cmake b/cmake/core.cmake index bc5e7eb718..9c1bf4a6f2 100644 --- a/cmake/core.cmake +++ b/cmake/core.cmake @@ -45,6 +45,7 @@ target_add_mason_package(mbgl-core PRIVATE supercluster) target_add_mason_package(mbgl-core PRIVATE kdbush) target_add_mason_package(mbgl-core PRIVATE earcut) target_add_mason_package(mbgl-core PRIVATE protozero) +target_add_mason_package(mbgl-core PRIVATE icu) mbgl_platform_core() diff --git a/platform/default/mbgl/storage/offline_download.cpp b/platform/default/mbgl/storage/offline_download.cpp index 9e2e11c86f..3edc75845c 100644 --- a/platform/default/mbgl/storage/offline_download.cpp +++ b/platform/default/mbgl/storage/offline_download.cpp @@ -184,7 +184,7 @@ void OfflineDownload::activateDownload() { if (!parser.glyphURL.empty()) { for (const auto& fontStack : parser.fontStacks()) { - for (uint32_t i = 0; i < GLYPH_RANGES_PER_FONT_STACK; i++) { + for (char16_t i = 0; i < GLYPH_RANGES_PER_FONT_STACK; i++) { queueResource(Resource::glyphs(parser.glyphURL, fontStack, getGlyphRange(i * GLYPHS_PER_GLYPH_RANGE))); } } diff --git a/platform/default/string_stdlib.cpp b/platform/default/string_stdlib.cpp index 90a75c1738..0e97fc54d5 100644 --- a/platform/default/string_stdlib.cpp +++ b/platform/default/string_stdlib.cpp @@ -1,10 +1,10 @@ #include -#include #define NU_WITH_TOUPPER #define NU_WITH_TOLOWER #define NU_WITH_UTF8_WRITER #include #include +#include namespace mbgl { namespace platform { diff --git a/src/mbgl/layout/merge_lines.cpp b/src/mbgl/layout/merge_lines.cpp index f4fdb82617..676cbc092d 100644 --- a/src/mbgl/layout/merge_lines.cpp +++ b/src/mbgl/layout/merge_lines.cpp @@ -47,10 +47,10 @@ enum class Side { }; size_t -getKey(const std::u32string& text, const GeometryCollection& geom, Side side) { +getKey(const std::u16string& text, const GeometryCollection& geom, Side side) { const GeometryCoordinate& coord = side == Side::Right ? geom[0].back() : geom[0].front(); - auto hash = std::hash()(text); + auto hash = std::hash()(text); boost::hash_combine(hash, coord.x); boost::hash_combine(hash, coord.y); return hash; diff --git a/src/mbgl/layout/symbol_feature.hpp b/src/mbgl/layout/symbol_feature.hpp index 99db4f9ac5..9e0eacaac5 100644 --- a/src/mbgl/layout/symbol_feature.hpp +++ b/src/mbgl/layout/symbol_feature.hpp @@ -10,7 +10,7 @@ namespace mbgl { class SymbolFeature { public: GeometryCollection geometry; - optional text; + optional text; optional icon; std::size_t index; }; diff --git a/src/mbgl/layout/symbol_layout.cpp b/src/mbgl/layout/symbol_layout.cpp index 07ba2bf4a3..94dc1d126e 100644 --- a/src/mbgl/layout/symbol_layout.cpp +++ b/src/mbgl/layout/symbol_layout.cpp @@ -90,10 +90,10 @@ SymbolLayout::SymbolLayout(std::string bucketName_, u8string = platform::lowercase(u8string); } - ft.text = util::utf8_to_utf32::convert(u8string); + ft.text = applyArabicShaping(util::utf8_to_utf16::convert(u8string)); // Loop through all characters of this text and collect unique codepoints. - for (char32_t chr : *ft.text) { + for (char16_t chr : *ft.text) { ranges.insert(getGlyphRange(chr)); } } @@ -201,7 +201,8 @@ void SymbolLayout::prepare(uintptr_t tileUID, /* verticalAlign */ verticalAlign, /* justify */ justify, /* spacing: ems */ layout.textLetterSpacing * 24, - /* translate */ Point(layout.textOffset.value[0], layout.textOffset.value[1])); + /* translate */ Point(layout.textOffset.value[0], layout.textOffset.value[1]), + /* bidirectional algorithm object */ bidi); // Add the glyphs we need for this label to the glyph atlas. if (shapedText) { @@ -309,7 +310,7 @@ void SymbolLayout::addFeature(const GeometryCollection &lines, } } -bool SymbolLayout::anchorIsTooClose(const std::u32string &text, const float repeatDistance, Anchor &anchor) { +bool SymbolLayout::anchorIsTooClose(const std::u16string &text, const float repeatDistance, Anchor &anchor) { if (compareText.find(text) == compareText.end()) { compareText.emplace(text, Anchors()); } else { diff --git a/src/mbgl/layout/symbol_layout.hpp b/src/mbgl/layout/symbol_layout.hpp index 54acf84aaf..c21398fabf 100644 --- a/src/mbgl/layout/symbol_layout.hpp +++ b/src/mbgl/layout/symbol_layout.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -64,8 +65,8 @@ private: const GlyphPositions& face, const size_t index); - bool anchorIsTooClose(const std::u32string& text, const float repeatDistance, Anchor&); - std::map> compareText; + bool anchorIsTooClose(const std::u16string& text, const float repeatDistance, Anchor&); + std::map> compareText; void addToDebugBuffers(CollisionTile&, SymbolBucket&); @@ -91,6 +92,8 @@ private: GlyphRangeSet ranges; std::vector symbolInstances; std::vector features; + + BiDi bidi; // Consider moving this up to geometry tile worker to reduce reinstantiation costs; use of BiDi/ubiditransform object must be constrained to one thread }; } // namespace mbgl diff --git a/src/mbgl/text/bidi.cpp b/src/mbgl/text/bidi.cpp new file mode 100644 index 0000000000..93a778b334 --- /dev/null +++ b/src/mbgl/text/bidi.cpp @@ -0,0 +1,127 @@ +#include + +#include +#include +#include + +namespace mbgl { + + class BiDiImpl { + public: + BiDiImpl() : bidiText(ubidi_open()), bidiLine(ubidi_open()) { + } + ~BiDiImpl() { + ubidi_close(bidiText); + ubidi_close(bidiLine); + } + + UBiDi* bidiText = nullptr; + UBiDi* bidiLine = nullptr; + }; + + BiDi::BiDi() : impl(std::make_unique()) {} + BiDi::~BiDi() = default; + + // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining + // logical order. Output won't be intelligible until the bidirectional algorithm is applied + std::u16string applyArabicShaping(const std::u16string& input) { + UErrorCode errorCode = U_ZERO_ERROR; + + const int32_t outputLength = + u_shapeArabic(input.c_str(), static_cast(input.size()), NULL, 0, + (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | + (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), + &errorCode); + + // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR + errorCode = U_ZERO_ERROR; + + auto outputText = std::make_unique(outputLength); + u_shapeArabic(input.c_str(), static_cast(input.size()), outputText.get(), outputLength, + (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | + (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), + &errorCode); + + // If the algorithm fails for any reason, fall back to non-transformed text + if (U_FAILURE(errorCode)) + return input; + + return std::u16string(outputText.get(), outputLength); + } + + void BiDi::mergeParagraphLineBreaks(std::set& lineBreakPoints) { + int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText); + for (int32_t i = 0; i < paragraphCount; i++) { + UErrorCode errorCode = U_ZERO_ERROR; + int32_t paragraphEndIndex; + ubidi_getParagraphByIndex(impl->bidiText, i, NULL, ¶graphEndIndex, NULL, &errorCode); + + if (U_FAILURE(errorCode)) { + throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") + + u_errorName(errorCode)); + } + + lineBreakPoints.insert(static_cast(paragraphEndIndex)); + } + } + + std::vector BiDi::applyLineBreaking(std::set lineBreakPoints) { + // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all + // paragraph boundaries are included in the set of line break points. The calling code might not + // include the line break because it didn't need to wrap at that point, or because the text was + // separated with a more exotic code point such as (U+001C) + mergeParagraphLineBreaks(lineBreakPoints); + + std::vector transformedLines; + std::size_t start = 0; + for (std::size_t lineBreakPoint : lineBreakPoints) { + transformedLines.push_back(getLine(start, lineBreakPoint)); + start = lineBreakPoint; + } + + return transformedLines; + } + + std::vector BiDi::processText(const std::u16string& input, + std::set lineBreakPoints) { + UErrorCode errorCode = U_ZERO_ERROR; + + ubidi_setPara(impl->bidiText, input.c_str(), static_cast(input.size()), + UBIDI_DEFAULT_LTR, NULL, &errorCode); + + if (U_FAILURE(errorCode)) { + throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode)); + } + + return applyLineBreaking(lineBreakPoints); + } + + std::u16string BiDi::getLine(std::size_t start, std::size_t end) { + UErrorCode errorCode = U_ZERO_ERROR; + ubidi_setLine(impl->bidiText, static_cast(start), static_cast(end), impl->bidiLine, &errorCode); + + if (U_FAILURE(errorCode)) { + throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode)); + } + + // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve + // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require + // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi) + const int32_t outputLength = ubidi_getProcessedLength(impl->bidiLine); + auto outputText = std::make_unique(outputLength); + + // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses + // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that + // they don't show up on screen (some fonts have glyphs representing them) + ubidi_writeReordered(impl->bidiLine, outputText.get(), outputLength, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode); + + if (U_FAILURE(errorCode)) { + throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") + + u_errorName(errorCode)); + } + + return std::u16string(outputText.get(), outputLength); + } + +} // end namespace mbgl diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp new file mode 100644 index 0000000000..59d306489c --- /dev/null +++ b/src/mbgl/text/bidi.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace mbgl { + +class BiDi; +class BiDiImpl; + +std::u16string applyArabicShaping(const std::u16string&); + +class BiDi : private util::noncopyable { +public: + BiDi(); + ~BiDi(); + + std::vector processText(const std::u16string&, std::set); + +private: + void mergeParagraphLineBreaks(std::set&); + std::vector applyLineBreaking(std::set); + std::u16string getLine(std::size_t start, std::size_t end); + + std::unique_ptr impl; +}; + +} // end namespace mbgl diff --git a/src/mbgl/text/glyph.cpp b/src/mbgl/text/glyph.cpp index a877d7a799..29929b73e6 100644 --- a/src/mbgl/text/glyph.cpp +++ b/src/mbgl/text/glyph.cpp @@ -3,7 +3,7 @@ namespace mbgl { // Note: this only works for the BMP -GlyphRange getGlyphRange(char32_t glyph) { +GlyphRange getGlyphRange(char16_t glyph) { unsigned start = (glyph/256) * 256; unsigned end = (start + 255); if (start > 65280) start = 65280; diff --git a/src/mbgl/text/glyph.hpp b/src/mbgl/text/glyph.hpp index 975dc4ad23..a91e1f276a 100644 --- a/src/mbgl/text/glyph.hpp +++ b/src/mbgl/text/glyph.hpp @@ -11,7 +11,7 @@ namespace mbgl { // Note: this only works for the BMP -GlyphRange getGlyphRange(char32_t glyph); +GlyphRange getGlyphRange(char16_t glyph); struct GlyphMetrics { operator bool() const { @@ -55,10 +55,10 @@ public: class Shaping { public: explicit Shaping() : top(0), bottom(0), left(0), right(0) {} - explicit Shaping(float x, float y, std::u32string text_) + explicit Shaping(float x, float y, std::u16string text_) : text(std::move(text_)), top(y), bottom(y), left(x), right(x) {} std::vector positionedGlyphs; - std::u32string text; + std::u16string text; int32_t top; int32_t bottom; int32_t left; diff --git a/src/mbgl/text/glyph_atlas.cpp b/src/mbgl/text/glyph_atlas.cpp index 2f8c44db59..5377284db2 100644 --- a/src/mbgl/text/glyph_atlas.cpp +++ b/src/mbgl/text/glyph_atlas.cpp @@ -84,7 +84,7 @@ void GlyphAtlas::setObserver(GlyphAtlasObserver* observer_) { } void GlyphAtlas::addGlyphs(uintptr_t tileUID, - const std::u32string& text, + const std::u16string& text, const FontStack& fontStack, const GlyphSet& glyphSet, GlyphPositions& face) @@ -93,7 +93,7 @@ void GlyphAtlas::addGlyphs(uintptr_t tileUID, const std::map& sdfs = glyphSet.getSDFs(); - for (uint32_t chr : text) + for (char16_t chr : text) { auto sdf_it = sdfs.find(chr); if (sdf_it == sdfs.end()) { diff --git a/src/mbgl/text/glyph_atlas.hpp b/src/mbgl/text/glyph_atlas.hpp index 84875bdd78..2bfc137e53 100644 --- a/src/mbgl/text/glyph_atlas.hpp +++ b/src/mbgl/text/glyph_atlas.hpp @@ -53,7 +53,7 @@ public: void setObserver(GlyphAtlasObserver* observer); void addGlyphs(uintptr_t tileUID, - const std::u32string& text, + const std::u16string& text, const FontStack&, const GlyphSet&, GlyphPositions&); diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp index 0875a83850..a4b197944e 100644 --- a/src/mbgl/text/glyph_set.cpp +++ b/src/mbgl/text/glyph_set.cpp @@ -1,7 +1,10 @@ +#include #include #include -#include +#include + +#include #include namespace mbgl { @@ -26,44 +29,46 @@ void GlyphSet::insert(uint32_t id, SDFGlyph&& glyph) { } } -const std::map &GlyphSet::getSDFs() const { +const std::map& GlyphSet::getSDFs() const { return sdfs; } -const Shaping GlyphSet::getShaping(const std::u32string &string, const float maxWidth, - const float lineHeight, const float horizontalAlign, - const float verticalAlign, const float justify, - const float spacing, const Point &translate) const { - Shaping shaping(translate.x * 24, translate.y * 24, string); - - // the y offset *should* be part of the font metadata - const int32_t yOffset = -17; - - float x = 0; - const float y = yOffset; +const Shaping GlyphSet::getShaping(const std::u16string& logicalInput, + const float maxWidth, + const float lineHeight, + const float horizontalAlign, + const float verticalAlign, + const float justify, + const float spacing, + const Point& translate, + BiDi& bidi) const { - // Loop through all characters of this label and shape. - for (uint32_t chr : string) { - auto it = sdfs.find(chr); - if (it != sdfs.end()) { - shaping.positionedGlyphs.emplace_back(chr, x, y); - x += it->second.metrics.advance + spacing; - } - } + // The string stored in shaping.text is used for finding duplicates, but may end up quite + // different from the glyphs that get shown + Shaping shaping(translate.x * 24, translate.y * 24, logicalInput); - if (shaping.positionedGlyphs.empty()) - return shaping; + std::vector reorderedLines = + bidi.processText(logicalInput, + determineLineBreaks(logicalInput, spacing, maxWidth)); - lineWrap(shaping, lineHeight, maxWidth, horizontalAlign, verticalAlign, justify, translate); + shapeLines(shaping, reorderedLines, spacing, lineHeight, horizontalAlign, verticalAlign, + justify, translate); return shaping; } -void align(Shaping &shaping, const float justify, const float horizontalAlign, - const float verticalAlign, const uint32_t maxLineLength, const float lineHeight, - const uint32_t line, const Point &translate) { - const float shiftX = (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24/* one em */); - const float shiftY = (-verticalAlign * (line + 1) + 0.5) * lineHeight + ::round(translate.y * 24/* one em */); +void align(Shaping& shaping, + const float justify, + const float horizontalAlign, + const float verticalAlign, + const float maxLineLength, + const float lineHeight, + const std::size_t lineCount, + const Point& translate) { + const float shiftX = + (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */); + const float shiftY = + (-verticalAlign * lineCount + 0.5) * lineHeight + ::round(translate.y * 24 /* one em */); for (auto& glyph : shaping.positionedGlyphs) { glyph.x += shiftX; @@ -71,94 +76,225 @@ void align(Shaping &shaping, const float justify, const float horizontalAlign, } } -void justifyLine(std::vector &positionedGlyphs, const std::map &sdfs, uint32_t start, - uint32_t end, float justify) { - PositionedGlyph &glyph = positionedGlyphs[end]; +// justify left = 0, right = 1, center = .5 +void justifyLine(std::vector& positionedGlyphs, + const std::map& sdfs, + std::size_t start, + std::size_t end, + float justify) { + if (!justify) { + return; + } + + PositionedGlyph& glyph = positionedGlyphs[end]; auto it = sdfs.find(glyph.glyph); if (it != sdfs.end()) { const uint32_t lastAdvance = it->second.metrics.advance; const float lineIndent = float(glyph.x + lastAdvance) * justify; - for (uint32_t j = start; j <= end; j++) { + for (std::size_t j = start; j <= end; j++) { positionedGlyphs[j].x -= lineIndent; } } } -void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, const float maxWidth, - const float horizontalAlign, const float verticalAlign, - const float justify, const Point &translate) const { - uint32_t lastSafeBreak = 0; +float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const { + float totalWidth = 0; - uint32_t lengthBeforeCurrentLine = 0; - uint32_t lineStartIndex = 0; - uint32_t line = 0; + for (char16_t chr : logicalInput) { + auto it = sdfs.find(chr); + if (it != sdfs.end()) { + totalWidth += it->second.metrics.advance + spacing; + } + } - uint32_t maxLineLength = 0; + int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); + return totalWidth / targetLineCount; +} + +float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) { + const float raggedness = std::pow(lineWidth - targetWidth, 2); + if (isLastBreak) { + // Favor finals lines shorter than average over longer than average + if (lineWidth < targetWidth) { + return raggedness / 2; + } else { + return raggedness * 2; + } + } + if (penalty < 0) { + return raggedness - std::pow(penalty, 2); + } + return raggedness + std::pow(penalty, 2); +} + +float calculatePenalty(char16_t codePoint, char16_t nextCodePoint) { + float penalty = 0; + // Force break on newline + if (codePoint == 0x0a) { + penalty -= 10000; + } + // Penalize open parenthesis at end of line + if (codePoint == 0x28 || codePoint == 0xff08) { + penalty += 50; + } - std::vector &positionedGlyphs = shaping.positionedGlyphs; + // Penalize close parenthesis at beginning of line + if (nextCodePoint == 0x29 || nextCodePoint == 0xff09) { + penalty += 50; + } + + return penalty; +} + +struct PotentialBreak { + PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness) + : index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness) + {} + + const std::size_t index; + const float x; + const PotentialBreak* priorBreak; + const float badness; +}; + + +PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list& potentialBreaks, const float penalty, const bool isLastBreak) { + // We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth + // ...but in fact we allow lines longer than maxWidth (if there's no break points) + // ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give + // more lopsided results. + + const PotentialBreak* bestPriorBreak = nullptr; + float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak); + for (const auto& potentialBreak : potentialBreaks) { + const float lineWidth = breakX - potentialBreak.x; + float breakBadness = + calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness; + if (breakBadness <= bestBreakBadness) { + bestPriorBreak = &potentialBreak; + bestBreakBadness = breakBadness; + } + } + + return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness); +} + +std::set leastBadBreaks(const PotentialBreak& lastLineBreak) { + std::set leastBadBreaks = { lastLineBreak.index }; + const PotentialBreak* priorBreak = lastLineBreak.priorBreak; + while (priorBreak) { + leastBadBreaks.insert(priorBreak->index); + priorBreak = priorBreak->priorBreak; + } + return leastBadBreaks; +} + + +// We determine line breaks based on shaped text in logical order. Working in visual order would be +// more intuitive, but we can't do that because the visual order may be changed by line breaks! +std::set GlyphSet::determineLineBreaks(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const { + if (!maxWidth) { + return {}; + } + + if (logicalInput.empty()) { + return {}; + } + + const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth); + + std::list potentialBreaks; + float currentX = 0; + + for (std::size_t i = 0; i < logicalInput.size(); i++) { + const char16_t codePoint = logicalInput[i]; + auto it = sdfs.find(codePoint); + if (it != sdfs.end() && !boost::algorithm::is_any_of(u" \t\n\v\f\r")(codePoint)) { + currentX += it->second.metrics.advance + spacing; + } + + if (i >= logicalInput.size() - 1) + continue; + + // Spaces, plus word-breaking punctuation that often appears without surrounding spaces. + if (codePoint == 0x20 /* space */ + || codePoint == 0x26 /* ampersand */ + || codePoint == 0x2b /* plus sign */ + || codePoint == 0x2d /* hyphen-minus */ + || codePoint == 0x2f /* solidus */ + || codePoint == 0xad /* soft hyphen */ + || codePoint == 0xb7 /* middle dot */ + || codePoint == 0x200b /* zero-width space */ + || codePoint == 0x2010 /* hyphen */ + || codePoint == 0x2013 /* en dash */) { + potentialBreaks.push_back(evaluateBreak(i+1, currentX, targetWidth, potentialBreaks, + calculatePenalty(codePoint, logicalInput[i+1]), + false)); + } + } - if (maxWidth) { - for (uint32_t i = 0; i < positionedGlyphs.size(); i++) { - PositionedGlyph &shape = positionedGlyphs[i]; + return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true)); +} - shape.x -= lengthBeforeCurrentLine; - shape.y += lineHeight * line; +void GlyphSet::shapeLines(Shaping& shaping, + const std::vector& lines, + const float spacing, + const float lineHeight, + const float horizontalAlign, + const float verticalAlign, + const float justify, + const Point& translate) const { - if (shape.x > maxWidth && lastSafeBreak > 0) { + // the y offset *should* be part of the font metadata + const int32_t yOffset = -17; - uint32_t lineLength = positionedGlyphs[lastSafeBreak + 1].x; - maxLineLength = util::max(lineLength, maxLineLength); + float x = 0; + float y = yOffset; - for (uint32_t k = lastSafeBreak + 1; k <= i; k++) { - positionedGlyphs[k].y += lineHeight; - positionedGlyphs[k].x -= lineLength; - } + float maxLineLength = 0; - if (justify) { - // Collapse invisible characters. - uint32_t breakGlyph = positionedGlyphs[lastSafeBreak].glyph; - uint32_t lineEnd = lastSafeBreak; - if (breakGlyph == 0x20 /* space */ - || breakGlyph == 0x200b /* zero-width space */) { - lineEnd--; - } + for (std::u16string line : lines) { + // Collapse whitespace so it doesn't throw off justification + boost::algorithm::trim_if(line, boost::algorithm::is_any_of(u" \t\n\v\f\r")); - justifyLine(positionedGlyphs, sdfs, lineStartIndex, lineEnd, justify); - } + if (line.empty()) { + y += lineHeight; // Still need a line feed after empty line + continue; + } - lineStartIndex = lastSafeBreak + 1; - lastSafeBreak = 0; - lengthBeforeCurrentLine += lineLength; - line++; + std::size_t lineStartIndex = shaping.positionedGlyphs.size(); + for (char16_t chr : line) { + auto it = sdfs.find(chr); + if (it == sdfs.end()) { + continue; } - // Spaces, plus word-breaking punctuation that often appears without surrounding spaces. - if (shape.glyph == 0x20 /* space */ - || shape.glyph == 0x26 /* ampersand */ - || shape.glyph == 0x2b /* plus sign */ - || shape.glyph == 0x2d /* hyphen-minus */ - || shape.glyph == 0x2f /* solidus */ - || shape.glyph == 0xad /* soft hyphen */ - || shape.glyph == 0xb7 /* middle dot */ - || shape.glyph == 0x200b /* zero-width space */ - || shape.glyph == 0x2010 /* hyphen */ - || shape.glyph == 0x2013 /* en dash */) { - lastSafeBreak = i; - } + const SDFGlyph& glyph = it->second; + shaping.positionedGlyphs.emplace_back(chr, x, y); + x += glyph.metrics.advance + spacing; } - } - const PositionedGlyph& lastPositionedGlyph = positionedGlyphs.back(); - const auto lastGlyphIt = sdfs.find(lastPositionedGlyph.glyph); - assert(lastGlyphIt != sdfs.end()); - const uint32_t lastLineLength = lastPositionedGlyph.x + lastGlyphIt->second.metrics.advance; - maxLineLength = std::max(maxLineLength, lastLineLength); + // Only justify if we placed at least one glyph + if (shaping.positionedGlyphs.size() != lineStartIndex) { + float lineLength = x - spacing; // Don't count trailing spacing + maxLineLength = util::max(lineLength, maxLineLength); + + justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex, + shaping.positionedGlyphs.size() - 1, justify); + } - const uint32_t height = (line + 1) * lineHeight; + x = 0; + y += lineHeight; + } - justifyLine(positionedGlyphs, sdfs, lineStartIndex, uint32_t(positionedGlyphs.size()) - 1, justify); - align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, line, translate); + align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, + lines.size(), translate); + const uint32_t height = lines.size() * lineHeight; // Calculate the bounding box shaping.top += -verticalAlign * height; diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp index 37ffdb070a..3037cefca0 100644 --- a/src/mbgl/text/glyph_set.hpp +++ b/src/mbgl/text/glyph_set.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -8,14 +9,34 @@ namespace mbgl { class GlyphSet { public: void insert(uint32_t id, SDFGlyph&&); - const std::map &getSDFs() const; - const Shaping getShaping(const std::u32string &string, float maxWidth, float lineHeight, - float horizontalAlign, float verticalAlign, float justify, - float spacing, const Point &translate) const; - void lineWrap(Shaping &shaping, float lineHeight, float maxWidth, float horizontalAlign, - float verticalAlign, float justify, const Point &translate) const; + const std::map& getSDFs() const; + const Shaping getShaping(const std::u16string& string, + float maxWidth, + float lineHeight, + float horizontalAlign, + float verticalAlign, + float justify, + float spacing, + const Point& translate, + BiDi& bidi) const; private: + float determineAverageLineWidth(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const; + std::set determineLineBreaks(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const; + + void shapeLines(Shaping& shaping, + const std::vector& lines, + const float spacing, + float lineHeight, + float horizontalAlign, + float verticalAlign, + float justify, + const Point& translate) const; + std::map sdfs; }; diff --git a/src/mbgl/util/utf.hpp b/src/mbgl/util/utf.hpp index 560ca3ba7f..81330cfc83 100644 --- a/src/mbgl/util/utf.hpp +++ b/src/mbgl/util/utf.hpp @@ -2,18 +2,17 @@ #include -#include +#include +#include namespace mbgl { namespace util { -class utf8_to_utf32 { - public: - static std::u32string convert(std::string const& utf8) - { - boost::u8_to_u32_iterator begin(utf8.begin()); - boost::u8_to_u32_iterator end(utf8.end()); - return std::u32string(begin,end); +class utf8_to_utf16 { +public: + static std::u16string convert(std::string const& utf8) { + std::wstring_convert, char16_t> converter; + return converter.from_bytes(utf8); } }; diff --git a/test/util/merge_lines.test.cpp b/test/util/merge_lines.test.cpp index db81d8b209..30cd1af068 100644 --- a/test/util/merge_lines.test.cpp +++ b/test/util/merge_lines.test.cpp @@ -3,8 +3,8 @@ #include #include -const std::u32string aaa = U"a"; -const std::u32string bbb = U"b"; +const std::u16string aaa = u"a"; +const std::u16string bbb = u"b"; TEST(MergeLines, SameText) { // merges lines with the same text -- cgit v1.2.1