diff options
author | Chris Loer <chris.loer@gmail.com> | 2017-01-05 08:47:02 -0800 |
---|---|---|
committer | Chris Loer <chris.loer@gmail.com> | 2017-01-05 10:22:53 -0800 |
commit | 12a9932ebcbefd6da74de8c4b3d1e07bb0016e2b (patch) | |
tree | 66f40c182319c36d249d414e6def26476557f905 | |
parent | 35ccb01dbdf95595591c7b2bf3c4a0882a1b9be7 (diff) | |
download | qtlocation-mapboxgl-12a9932ebcbefd6da74de8c4b3d1e07bb0016e2b.tar.gz |
Port raggedness-minimizing line breaking from gl-js.
-rw-r--r-- | package.json | 2 | ||||
-rw-r--r-- | platform/default/bidi.cpp | 24 | ||||
-rw-r--r-- | platform/qt/src/bidi.cpp | 19 | ||||
-rw-r--r-- | src/mbgl/text/bidi.hpp | 8 | ||||
-rw-r--r-- | src/mbgl/text/glyph_set.cpp | 142 | ||||
-rw-r--r-- | src/mbgl/text/glyph_set.hpp | 4 | ||||
-rw-r--r-- | src/mbgl/util/i18n.cpp | 2 |
7 files changed, 130 insertions, 71 deletions
diff --git a/package.json b/package.json index edfc807f11..59562f041f 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "express": "^4.11.1", "lodash": "^4.16.4", "mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#49e8b407bdbbe6f7c92dbcb56d3d51f425fc2653", - "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#9c7063dc649821d03e6851dfa51fb9628c479f86", + "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#f20a94e329e9af1abf42044a0dd1b96925cf629b", "mkdirp": "^0.5.1", "node-cmake": "^1.2.1", "pixelmatch": "^4.0.2", diff --git a/platform/default/bidi.cpp b/platform/default/bidi.cpp index 9de6328a05..25b4dbe3a7 100644 --- a/platform/default/bidi.cpp +++ b/platform/default/bidi.cpp @@ -19,12 +19,8 @@ public: UBiDi* bidiLine = nullptr; }; -BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) -{ -} - -BiDi::~BiDi() { -} +BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {} +BiDi::~BiDi() = default; // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining // logical order. Output won't be intelligible until the bidirectional algorithm is applied @@ -53,7 +49,7 @@ std::u16string applyArabicShaping(const std::u16string& input) { return std::u16string(outputText.get(), outputLength); } -void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) { +void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) { int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText); for (int32_t i = 0; i < paragraphCount; i++) { UErrorCode errorCode = U_ZERO_ERROR; @@ -65,11 +61,11 @@ void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) { u_errorName(errorCode)); } - lineBreakPoints.insert(paragraphEndIndex); + lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex)); } } -std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) { +std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) { // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all // paragraph boundaries are included in the set of line break points. The calling code might not // include the line break because it didn't need to wrap at that point, or because the text was @@ -77,8 +73,8 @@ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakP mergeParagraphLineBreaks(lineBreakPoints); std::vector<std::u16string> transformedLines; - int32_t start = 0; - for (int32_t lineBreakPoint : lineBreakPoints) { + std::size_t start = 0; + for (std::size_t lineBreakPoint : lineBreakPoints) { transformedLines.push_back(getLine(start, lineBreakPoint)); start = lineBreakPoint; } @@ -87,7 +83,7 @@ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakP } std::vector<std::u16string> BiDi::processText(const std::u16string& input, - std::set<int32_t> lineBreakPoints) { + std::set<std::size_t> lineBreakPoints) { UErrorCode errorCode = U_ZERO_ERROR; ubidi_setPara(impl->bidiText, input.c_str(), static_cast<int32_t>(input.size()), @@ -100,9 +96,9 @@ std::vector<std::u16string> BiDi::processText(const std::u16string& input, return applyLineBreaking(lineBreakPoints); } -std::u16string BiDi::getLine(int32_t start, int32_t end) { +std::u16string BiDi::getLine(std::size_t start, std::size_t end) { UErrorCode errorCode = U_ZERO_ERROR; - ubidi_setLine(impl->bidiText, start, end, impl->bidiLine, &errorCode); + ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode); if (U_FAILURE(errorCode)) { throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode)); diff --git a/platform/qt/src/bidi.cpp b/platform/qt/src/bidi.cpp index efb6932252..0eaeb3f2f4 100644 --- a/platform/qt/src/bidi.cpp +++ b/platform/qt/src/bidi.cpp @@ -16,17 +16,17 @@ std::u16string applyArabicShaping(const std::u16string& input) { return utf16string.toStdU16String(); } -void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) { - lineBreakPoints.insert(bidi.impl->string.length()); +void BiDi::mergeParagraphLineBreaks(std::set<std::size_t>& lineBreakPoints) { + lineBreakPoints.insert(static_cast<std::size_t>(bidi.impl->string.length())); } std::vector<std::u16string> -BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) { +BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) { mergeParagraphLineBreaks(lineBreakPoints); std::vector<std::u16string> transformedLines; - int32_t start = 0; - for (int32_t lineBreakPoint : lineBreakPoints) { + std::size_t start = 0; + for (std::size_t lineBreakPoint : lineBreakPoints) { transformedLines.push_back(bidi.getLine(start, lineBreakPoint)); start = lineBreakPoint; } @@ -38,16 +38,15 @@ BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) { } -BiDi::~BiDi() { -} +BiDi::~BiDi() = default; -std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<int32_t> lineBreakPoints) { +std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<std::size_t> lineBreakPoints) { impl->string = QString::fromStdU16String(input); return applyLineBreaking(lineBreakPoints); } -std::u16string BiDi::getLine(int32_t start, int32_t end) { - return impl->string.mid(start, end - start).toStdU16String(); +std::u16string BiDi::getLine(std::size_t start, std::size_t end) { + return impl->string.mid(static_cast<int32_t>(start), static_cast<int32_t>(end - start)).toStdU16String(); } } // end namespace mbgl diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp index 73bb556a00..59d306489c 100644 --- a/src/mbgl/text/bidi.hpp +++ b/src/mbgl/text/bidi.hpp @@ -19,12 +19,12 @@ public: BiDi(); ~BiDi(); - std::vector<std::u16string> processText(const std::u16string&, std::set<int32_t>); + std::vector<std::u16string> processText(const std::u16string&, std::set<std::size_t>); private: - void mergeParagraphLineBreaks(std::set<int32_t>&); - std::vector<std::u16string> applyLineBreaking(std::set<int32_t>); - std::u16string getLine(int32_t start, int32_t end); + void mergeParagraphLineBreaks(std::set<std::size_t>&); + std::vector<std::u16string> applyLineBreaking(std::set<std::size_t>); + std::u16string getLine(std::size_t start, std::size_t end); std::unique_ptr<BiDiImpl> impl; }; diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp index 537d9c0579..0040926103 100644 --- a/src/mbgl/text/glyph_set.cpp +++ b/src/mbgl/text/glyph_set.cpp @@ -64,7 +64,7 @@ void align(Shaping& shaping, const float verticalAlign, const float maxLineLength, const float lineHeight, - const uint32_t lineCount, + const std::size_t lineCount, const Point<float>& translate) { const float shiftX = (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */); @@ -80,8 +80,8 @@ void align(Shaping& shaping, // justify left = 0, right = 1, center = .5 void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs, const std::map<uint32_t, SDFGlyph>& sdfs, - uint32_t start, - uint32_t end, + std::size_t start, + std::size_t end, float justify) { if (!justify) { return; @@ -93,34 +93,104 @@ void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs, const uint32_t lastAdvance = it->second.metrics.advance; const float lineIndent = float(glyph.x + lastAdvance) * justify; - for (uint32_t j = start; j <= end; j++) { + for (std::size_t j = start; j <= end; j++) { positionedGlyphs[j].x -= lineIndent; } } } -float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput, +float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput, const float spacing, float maxWidth) const { float totalWidth = 0; - // totalWidth doesn't include the last character for magical tuning reasons. This makes the - // algorithm a little more agressive about trying to fit the text into fewer lines, taking - // advantage of the tolerance for going a little over maxWidth - for (uint32_t i = 0; i < logicalInput.size() - 1; i++) { - auto it = sdfs.find(logicalInput[i]); + for (char16_t chr : logicalInput) { + auto it = sdfs.find(chr); if (it != sdfs.end()) { totalWidth += it->second.metrics.advance + spacing; } } - int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); - return totalWidth / lineCount; + int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); + return totalWidth / targetLineCount; +} + +float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) { + const float raggedness = std::pow(lineWidth - targetWidth, 2); + if (isLastBreak && lineWidth < targetWidth) { + // Be more tolerant of short final lines + return std::fmax(0, raggedness - 150); + } + if (penalty < 0) { + return raggedness - std::pow(penalty, 2); + } + return raggedness + std::pow(penalty, 2); +} + +float calculatePenalty(char16_t codePoint, char16_t previousCodePoint) { + float penalty = 0; + // Force break on newline + if (codePoint == 0x0a) { + penalty -= 10000; + } + // Penalize open parenthesis at end of line + if (previousCodePoint && (previousCodePoint == 0x28 || previousCodePoint == 0xff08)) { + penalty += 50; + } + // Penalize close parenthesis at beginning of line + if (codePoint == 0x29 || codePoint == 0xff09) { + penalty += 50; + } + return penalty; +} + +struct PotentialBreak { + PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness) + : index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness) + {} + + const std::size_t index; + const float x; + const PotentialBreak* priorBreak; + const float badness; +}; + + +PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list<PotentialBreak>& potentialBreaks, const float penalty, const bool isLastBreak) { + // We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth + // ...but in fact we allow lines longer than maxWidth (if there's no break points) + // ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give + // more lopsided results. + + const PotentialBreak* bestPriorBreak = nullptr; + float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak); + + for (const auto& potentialBreak : potentialBreaks) { + const float lineWidth = breakX - potentialBreak.x; + float breakBadness = calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness; + if (breakBadness <= bestBreakBadness) { + bestPriorBreak = &potentialBreak; + bestBreakBadness = breakBadness; + } + } + + return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness); +} + +std::set<std::size_t> leastBadBreaks(const PotentialBreak& lastLineBreak) { + std::set<std::size_t> leastBadBreaks = { lastLineBreak.index }; + const PotentialBreak* priorBreak = lastLineBreak.priorBreak; + while (priorBreak) { + leastBadBreaks.insert(priorBreak->index); + priorBreak = priorBreak->priorBreak; + } + return leastBadBreaks; } + // We determine line breaks based on shaped text in logical order. Working in visual order would be // more intuitive, but we can't do that because the visual order may be changed by line breaks! -std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput, +std::set<std::size_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput, const float spacing, float maxWidth) const { if (!maxWidth) { @@ -130,42 +200,34 @@ std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInp if (logicalInput.empty()) { return {}; } - - if (util::i18n::allowsIdeographicBreaking(logicalInput)) { - maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth); - } - - std::set<int32_t> lineBreakPoints; + + const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth); + + std::list<PotentialBreak> potentialBreaks; float currentX = 0; - uint32_t lastSafeBreak = 0; - float lastSafeBreakX = 0; - for (uint32_t i = 0; i < logicalInput.size(); i++) { - auto it = sdfs.find(logicalInput[i]); + for (std::size_t i = 0; i < logicalInput.size(); i++) { + const char16_t codePoint = logicalInput[i]; + auto it = sdfs.find(codePoint); if (it == sdfs.end()) { continue; } - const SDFGlyph& glyph = it->second; - // Ideographic characters, spaces, and word-breaking punctuation that often appear without // surrounding spaces. - if (util::i18n::allowsWordBreaking(glyph.id) || - util::i18n::allowsIdeographicBreaking(glyph.id)) { - lastSafeBreak = i; - lastSafeBreakX = currentX; - } + if (util::i18n::allowsWordBreaking(codePoint) || + util::i18n::allowsIdeographicBreaking(codePoint)) { + const char16_t previousCodePoint = i > 0 ? logicalInput[i-1] : 0; - if (currentX > maxWidth && lastSafeBreak > 0) { - lineBreakPoints.insert(lastSafeBreak); - currentX -= lastSafeBreakX; - lastSafeBreakX = 0; + potentialBreaks.push_back(evaluateBreak(i, currentX, targetWidth, potentialBreaks, + calculatePenalty(codePoint, previousCodePoint), + false)); } - currentX += glyph.metrics.advance + spacing; + currentX += it->second.metrics.advance + spacing; } - return lineBreakPoints; + return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true)); } void GlyphSet::shapeLines(Shaping& shaping, @@ -194,7 +256,7 @@ void GlyphSet::shapeLines(Shaping& shaping, continue; } - uint32_t lineStartIndex = static_cast<uint32_t>(shaping.positionedGlyphs.size()); + std::size_t lineStartIndex = shaping.positionedGlyphs.size(); for (char16_t chr : line) { auto it = sdfs.find(chr); if (it == sdfs.end()) { @@ -207,12 +269,12 @@ void GlyphSet::shapeLines(Shaping& shaping, } // Only justify if we placed at least one glyph - if (static_cast<uint32_t>(shaping.positionedGlyphs.size()) != lineStartIndex) { + if (shaping.positionedGlyphs.size() != lineStartIndex) { float lineLength = x - spacing; // Don't count trailing spacing maxLineLength = util::max(lineLength, maxLineLength); justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex, - static_cast<uint32_t>(shaping.positionedGlyphs.size()) - 1, justify); + shaping.positionedGlyphs.size() - 1, justify); } x = 0; @@ -220,7 +282,7 @@ void GlyphSet::shapeLines(Shaping& shaping, } align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, - static_cast<uint32_t>(lines.size()), translate); + lines.size(), translate); const uint32_t height = lines.size() * lineHeight; // Calculate the bounding box diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp index b48973b6ea..3037cefca0 100644 --- a/src/mbgl/text/glyph_set.hpp +++ b/src/mbgl/text/glyph_set.hpp @@ -21,10 +21,10 @@ public: BiDi& bidi) const; private: - float determineIdeographicLineWidth(const std::u16string& logicalInput, + float determineAverageLineWidth(const std::u16string& logicalInput, const float spacing, float maxWidth) const; - std::set<int32_t> determineLineBreaks(const std::u16string& logicalInput, + std::set<std::size_t> determineLineBreaks(const std::u16string& logicalInput, const float spacing, float maxWidth) const; diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp index 4be624d2e5..33ce5e22de 100644 --- a/src/mbgl/util/i18n.cpp +++ b/src/mbgl/util/i18n.cpp @@ -298,6 +298,8 @@ bool allowsWordBreaking(uint16_t chr) { return (chr == 0x0a /* newline */ || chr == 0x20 /* space */ || chr == 0x26 /* ampersand */ + || chr == 0x28 /* open parenthesis */ + || chr == 0x29 /* close parenthesis */ || chr == 0x2b /* plus sign */ || chr == 0x2d /* hyphen-minus */ || chr == 0x2f /* solidus */ |