From a88357d0e4013ec62d7d20e7a11ecd7d2b1e812f Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Wed, 21 Dec 2016 13:40:57 -0800 Subject: Port raggedness-minimizing line breaking from gl-js. --- package.json | 2 +- platform/default/bidi.cpp | 24 ++++---- platform/qt/src/bidi.cpp | 19 +++--- src/mbgl/text/bidi.hpp | 8 +-- src/mbgl/text/glyph_set.cpp | 142 +++++++++++++++++++++++++++++++------------- src/mbgl/text/glyph_set.hpp | 4 +- src/mbgl/util/i18n.cpp | 2 + 7 files changed, 130 insertions(+), 71 deletions(-) diff --git a/package.json b/package.json index f99653d184..02f1996ddb 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,7 @@ "lodash": "^4.16.4", "mapbox-gl": "mapbox/mapbox-gl-js#ab836206d415ca3a74257a3066d11a54ab2838cb", "mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#49e8b407bdbbe6f7c92dbcb56d3d51f425fc2653", - "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1ada02a706fd124fc3441fd3a2b3cda67960ff5", + "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1208969a7855e524bd174bddf4e46f0bb697cb6", "mkdirp": "^0.5.1", "node-cmake": "^1.2.1", "pixelmatch": "^4.0.2", diff --git a/platform/default/bidi.cpp b/platform/default/bidi.cpp index 9de6328a05..25b4dbe3a7 100644 --- a/platform/default/bidi.cpp +++ b/platform/default/bidi.cpp @@ -19,12 +19,8 @@ public: UBiDi* bidiLine = nullptr; }; -BiDi::BiDi() : impl(std::make_unique()) -{ -} - -BiDi::~BiDi() { -} +BiDi::BiDi() : impl(std::make_unique()) {} +BiDi::~BiDi() = default; // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining // logical order. Output won't be intelligible until the bidirectional algorithm is applied @@ -53,7 +49,7 @@ std::u16string applyArabicShaping(const std::u16string& input) { return std::u16string(outputText.get(), outputLength); } -void BiDi::mergeParagraphLineBreaks(std::set& lineBreakPoints) { +void BiDi::mergeParagraphLineBreaks(std::set& lineBreakPoints) { int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText); for (int32_t i = 0; i < paragraphCount; i++) { UErrorCode errorCode = U_ZERO_ERROR; @@ -65,11 +61,11 @@ void BiDi::mergeParagraphLineBreaks(std::set& lineBreakPoints) { u_errorName(errorCode)); } - lineBreakPoints.insert(paragraphEndIndex); + lineBreakPoints.insert(static_cast(paragraphEndIndex)); } } -std::vector BiDi::applyLineBreaking(std::set lineBreakPoints) { +std::vector BiDi::applyLineBreaking(std::set lineBreakPoints) { // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all // paragraph boundaries are included in the set of line break points. The calling code might not // include the line break because it didn't need to wrap at that point, or because the text was @@ -77,8 +73,8 @@ std::vector BiDi::applyLineBreaking(std::set lineBreakP mergeParagraphLineBreaks(lineBreakPoints); std::vector transformedLines; - int32_t start = 0; - for (int32_t lineBreakPoint : lineBreakPoints) { + std::size_t start = 0; + for (std::size_t lineBreakPoint : lineBreakPoints) { transformedLines.push_back(getLine(start, lineBreakPoint)); start = lineBreakPoint; } @@ -87,7 +83,7 @@ std::vector BiDi::applyLineBreaking(std::set lineBreakP } std::vector BiDi::processText(const std::u16string& input, - std::set lineBreakPoints) { + std::set lineBreakPoints) { UErrorCode errorCode = U_ZERO_ERROR; ubidi_setPara(impl->bidiText, input.c_str(), static_cast(input.size()), @@ -100,9 +96,9 @@ std::vector BiDi::processText(const std::u16string& input, return applyLineBreaking(lineBreakPoints); } -std::u16string BiDi::getLine(int32_t start, int32_t end) { +std::u16string BiDi::getLine(std::size_t start, std::size_t end) { UErrorCode errorCode = U_ZERO_ERROR; - ubidi_setLine(impl->bidiText, start, end, impl->bidiLine, &errorCode); + ubidi_setLine(impl->bidiText, static_cast(start), static_cast(end), impl->bidiLine, &errorCode); if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode)); diff --git a/platform/qt/src/bidi.cpp b/platform/qt/src/bidi.cpp index efb6932252..0eaeb3f2f4 100644 --- a/platform/qt/src/bidi.cpp +++ b/platform/qt/src/bidi.cpp @@ -16,17 +16,17 @@ std::u16string applyArabicShaping(const std::u16string& input) { return utf16string.toStdU16String(); } -void BiDi::mergeParagraphLineBreaks(std::set& lineBreakPoints) { - lineBreakPoints.insert(bidi.impl->string.length()); +void BiDi::mergeParagraphLineBreaks(std::set& lineBreakPoints) { + lineBreakPoints.insert(static_cast(bidi.impl->string.length())); } std::vector -BiDi::applyLineBreaking(std::set lineBreakPoints) { +BiDi::applyLineBreaking(std::set lineBreakPoints) { mergeParagraphLineBreaks(lineBreakPoints); std::vector transformedLines; - int32_t start = 0; - for (int32_t lineBreakPoint : lineBreakPoints) { + std::size_t start = 0; + for (std::size_t lineBreakPoint : lineBreakPoints) { transformedLines.push_back(bidi.getLine(start, lineBreakPoint)); start = lineBreakPoint; } @@ -38,16 +38,15 @@ BiDi::BiDi() : impl(std::make_unique()) { } -BiDi::~BiDi() { -} +BiDi::~BiDi() = default; -std::vector BiDi::processText(const std::u16string& input, std::set lineBreakPoints) { +std::vector BiDi::processText(const std::u16string& input, std::set lineBreakPoints) { impl->string = QString::fromStdU16String(input); return applyLineBreaking(lineBreakPoints); } -std::u16string BiDi::getLine(int32_t start, int32_t end) { - return impl->string.mid(start, end - start).toStdU16String(); +std::u16string BiDi::getLine(std::size_t start, std::size_t end) { + return impl->string.mid(static_cast(start), static_cast(end - start)).toStdU16String(); } } // end namespace mbgl diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp index 73bb556a00..59d306489c 100644 --- a/src/mbgl/text/bidi.hpp +++ b/src/mbgl/text/bidi.hpp @@ -19,12 +19,12 @@ public: BiDi(); ~BiDi(); - std::vector processText(const std::u16string&, std::set); + std::vector processText(const std::u16string&, std::set); private: - void mergeParagraphLineBreaks(std::set&); - std::vector applyLineBreaking(std::set); - std::u16string getLine(int32_t start, int32_t end); + void mergeParagraphLineBreaks(std::set&); + std::vector applyLineBreaking(std::set); + std::u16string getLine(std::size_t start, std::size_t end); std::unique_ptr impl; }; diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp index 537d9c0579..0040926103 100644 --- a/src/mbgl/text/glyph_set.cpp +++ b/src/mbgl/text/glyph_set.cpp @@ -64,7 +64,7 @@ void align(Shaping& shaping, const float verticalAlign, const float maxLineLength, const float lineHeight, - const uint32_t lineCount, + const std::size_t lineCount, const Point& translate) { const float shiftX = (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */); @@ -80,8 +80,8 @@ void align(Shaping& shaping, // justify left = 0, right = 1, center = .5 void justifyLine(std::vector& positionedGlyphs, const std::map& sdfs, - uint32_t start, - uint32_t end, + std::size_t start, + std::size_t end, float justify) { if (!justify) { return; @@ -93,34 +93,104 @@ void justifyLine(std::vector& positionedGlyphs, const uint32_t lastAdvance = it->second.metrics.advance; const float lineIndent = float(glyph.x + lastAdvance) * justify; - for (uint32_t j = start; j <= end; j++) { + for (std::size_t j = start; j <= end; j++) { positionedGlyphs[j].x -= lineIndent; } } } -float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput, +float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput, const float spacing, float maxWidth) const { float totalWidth = 0; - // totalWidth doesn't include the last character for magical tuning reasons. This makes the - // algorithm a little more agressive about trying to fit the text into fewer lines, taking - // advantage of the tolerance for going a little over maxWidth - for (uint32_t i = 0; i < logicalInput.size() - 1; i++) { - auto it = sdfs.find(logicalInput[i]); + for (char16_t chr : logicalInput) { + auto it = sdfs.find(chr); if (it != sdfs.end()) { totalWidth += it->second.metrics.advance + spacing; } } - int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); - return totalWidth / lineCount; + int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); + return totalWidth / targetLineCount; +} + +float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) { + const float raggedness = std::pow(lineWidth - targetWidth, 2); + if (isLastBreak && lineWidth < targetWidth) { + // Be more tolerant of short final lines + return std::fmax(0, raggedness - 150); + } + if (penalty < 0) { + return raggedness - std::pow(penalty, 2); + } + return raggedness + std::pow(penalty, 2); +} + +float calculatePenalty(char16_t codePoint, char16_t previousCodePoint) { + float penalty = 0; + // Force break on newline + if (codePoint == 0x0a) { + penalty -= 10000; + } + // Penalize open parenthesis at end of line + if (previousCodePoint && (previousCodePoint == 0x28 || previousCodePoint == 0xff08)) { + penalty += 50; + } + // Penalize close parenthesis at beginning of line + if (codePoint == 0x29 || codePoint == 0xff09) { + penalty += 50; + } + return penalty; +} + +struct PotentialBreak { + PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness) + : index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness) + {} + + const std::size_t index; + const float x; + const PotentialBreak* priorBreak; + const float badness; +}; + + +PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list& potentialBreaks, const float penalty, const bool isLastBreak) { + // We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth + // ...but in fact we allow lines longer than maxWidth (if there's no break points) + // ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give + // more lopsided results. + + const PotentialBreak* bestPriorBreak = nullptr; + float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak); + + for (const auto& potentialBreak : potentialBreaks) { + const float lineWidth = breakX - potentialBreak.x; + float breakBadness = calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness; + if (breakBadness <= bestBreakBadness) { + bestPriorBreak = &potentialBreak; + bestBreakBadness = breakBadness; + } + } + + return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness); +} + +std::set leastBadBreaks(const PotentialBreak& lastLineBreak) { + std::set leastBadBreaks = { lastLineBreak.index }; + const PotentialBreak* priorBreak = lastLineBreak.priorBreak; + while (priorBreak) { + leastBadBreaks.insert(priorBreak->index); + priorBreak = priorBreak->priorBreak; + } + return leastBadBreaks; } + // We determine line breaks based on shaped text in logical order. Working in visual order would be // more intuitive, but we can't do that because the visual order may be changed by line breaks! -std::set GlyphSet::determineLineBreaks(const std::u16string& logicalInput, +std::set GlyphSet::determineLineBreaks(const std::u16string& logicalInput, const float spacing, float maxWidth) const { if (!maxWidth) { @@ -130,42 +200,34 @@ std::set GlyphSet::determineLineBreaks(const std::u16string& logicalInp if (logicalInput.empty()) { return {}; } - - if (util::i18n::allowsIdeographicBreaking(logicalInput)) { - maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth); - } - - std::set lineBreakPoints; + + const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth); + + std::list potentialBreaks; float currentX = 0; - uint32_t lastSafeBreak = 0; - float lastSafeBreakX = 0; - for (uint32_t i = 0; i < logicalInput.size(); i++) { - auto it = sdfs.find(logicalInput[i]); + for (std::size_t i = 0; i < logicalInput.size(); i++) { + const char16_t codePoint = logicalInput[i]; + auto it = sdfs.find(codePoint); if (it == sdfs.end()) { continue; } - const SDFGlyph& glyph = it->second; - // Ideographic characters, spaces, and word-breaking punctuation that often appear without // surrounding spaces. - if (util::i18n::allowsWordBreaking(glyph.id) || - util::i18n::allowsIdeographicBreaking(glyph.id)) { - lastSafeBreak = i; - lastSafeBreakX = currentX; - } + if (util::i18n::allowsWordBreaking(codePoint) || + util::i18n::allowsIdeographicBreaking(codePoint)) { + const char16_t previousCodePoint = i > 0 ? logicalInput[i-1] : 0; - if (currentX > maxWidth && lastSafeBreak > 0) { - lineBreakPoints.insert(lastSafeBreak); - currentX -= lastSafeBreakX; - lastSafeBreakX = 0; + potentialBreaks.push_back(evaluateBreak(i, currentX, targetWidth, potentialBreaks, + calculatePenalty(codePoint, previousCodePoint), + false)); } - currentX += glyph.metrics.advance + spacing; + currentX += it->second.metrics.advance + spacing; } - return lineBreakPoints; + return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true)); } void GlyphSet::shapeLines(Shaping& shaping, @@ -194,7 +256,7 @@ void GlyphSet::shapeLines(Shaping& shaping, continue; } - uint32_t lineStartIndex = static_cast(shaping.positionedGlyphs.size()); + std::size_t lineStartIndex = shaping.positionedGlyphs.size(); for (char16_t chr : line) { auto it = sdfs.find(chr); if (it == sdfs.end()) { @@ -207,12 +269,12 @@ void GlyphSet::shapeLines(Shaping& shaping, } // Only justify if we placed at least one glyph - if (static_cast(shaping.positionedGlyphs.size()) != lineStartIndex) { + if (shaping.positionedGlyphs.size() != lineStartIndex) { float lineLength = x - spacing; // Don't count trailing spacing maxLineLength = util::max(lineLength, maxLineLength); justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex, - static_cast(shaping.positionedGlyphs.size()) - 1, justify); + shaping.positionedGlyphs.size() - 1, justify); } x = 0; @@ -220,7 +282,7 @@ void GlyphSet::shapeLines(Shaping& shaping, } align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, - static_cast(lines.size()), translate); + lines.size(), translate); const uint32_t height = lines.size() * lineHeight; // Calculate the bounding box diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp index b48973b6ea..3037cefca0 100644 --- a/src/mbgl/text/glyph_set.hpp +++ b/src/mbgl/text/glyph_set.hpp @@ -21,10 +21,10 @@ public: BiDi& bidi) const; private: - float determineIdeographicLineWidth(const std::u16string& logicalInput, + float determineAverageLineWidth(const std::u16string& logicalInput, const float spacing, float maxWidth) const; - std::set determineLineBreaks(const std::u16string& logicalInput, + std::set determineLineBreaks(const std::u16string& logicalInput, const float spacing, float maxWidth) const; diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp index 4be624d2e5..33ce5e22de 100644 --- a/src/mbgl/util/i18n.cpp +++ b/src/mbgl/util/i18n.cpp @@ -298,6 +298,8 @@ bool allowsWordBreaking(uint16_t chr) { return (chr == 0x0a /* newline */ || chr == 0x20 /* space */ || chr == 0x26 /* ampersand */ + || chr == 0x28 /* open parenthesis */ + || chr == 0x29 /* close parenthesis */ || chr == 0x2b /* plus sign */ || chr == 0x2d /* hyphen-minus */ || chr == 0x2f /* solidus */ -- cgit v1.2.1