summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Loer <chris.loer@gmail.com>2016-12-21 13:40:57 -0800
committerChris Loer <chris.loer@gmail.com>2016-12-21 13:55:49 -0800
commita88357d0e4013ec62d7d20e7a11ecd7d2b1e812f (patch)
tree0a0b8924fde5511f859b4d33f3eeab512139d7b9
parent9a6f81a1efdf1218a7cb11839356dc7a081e69a6 (diff)
downloadqtlocation-mapboxgl-a88357d0e4013ec62d7d20e7a11ecd7d2b1e812f.tar.gz
Port raggedness-minimizing line breaking from gl-js.
-rw-r--r--package.json2
-rw-r--r--platform/default/bidi.cpp24
-rw-r--r--platform/qt/src/bidi.cpp19
-rw-r--r--src/mbgl/text/bidi.hpp8
-rw-r--r--src/mbgl/text/glyph_set.cpp142
-rw-r--r--src/mbgl/text/glyph_set.hpp4
-rw-r--r--src/mbgl/util/i18n.cpp2
7 files changed, 130 insertions, 71 deletions
diff --git a/package.json b/package.json
index f99653d184..02f1996ddb 100644
--- a/package.json
+++ b/package.json
@@ -24,7 +24,7 @@
"lodash": "^4.16.4",
"mapbox-gl": "mapbox/mapbox-gl-js#ab836206d415ca3a74257a3066d11a54ab2838cb",
"mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#49e8b407bdbbe6f7c92dbcb56d3d51f425fc2653",
- "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1ada02a706fd124fc3441fd3a2b3cda67960ff5",
+ "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1208969a7855e524bd174bddf4e46f0bb697cb6",
"mkdirp": "^0.5.1",
"node-cmake": "^1.2.1",
"pixelmatch": "^4.0.2",
diff --git a/platform/default/bidi.cpp b/platform/default/bidi.cpp
index 9de6328a05..25b4dbe3a7 100644
--- a/platform/default/bidi.cpp
+++ b/platform/default/bidi.cpp
@@ -19,12 +19,8 @@ public:
UBiDi* bidiLine = nullptr;
};
-BiDi::BiDi() : impl(std::make_unique<BiDiImpl>())
-{
-}
-
-BiDi::~BiDi() {
-}
+BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
+BiDi::~BiDi() = default;
// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
// logical order. Output won't be intelligible until the bidirectional algorithm is applied
@@ -53,7 +49,7 @@ std::u16string applyArabicShaping(const std::u16string& input) {
return std::u16string(outputText.get(), outputLength);
}
-void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
+void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText);
for (int32_t i = 0; i < paragraphCount; i++) {
UErrorCode errorCode = U_ZERO_ERROR;
@@ -65,11 +61,11 @@ void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
u_errorName(errorCode));
}
- lineBreakPoints.insert(paragraphEndIndex);
+ lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex));
}
}
-std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
+std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
// BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
// paragraph boundaries are included in the set of line break points. The calling code might not
// include the line break because it didn't need to wrap at that point, or because the text was
@@ -77,8 +73,8 @@ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakP
mergeParagraphLineBreaks(lineBreakPoints);
std::vector<std::u16string> transformedLines;
- int32_t start = 0;
- for (int32_t lineBreakPoint : lineBreakPoints) {
+ std::size_t start = 0;
+ for (std::size_t lineBreakPoint : lineBreakPoints) {
transformedLines.push_back(getLine(start, lineBreakPoint));
start = lineBreakPoint;
}
@@ -87,7 +83,7 @@ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakP
}
std::vector<std::u16string> BiDi::processText(const std::u16string& input,
- std::set<int32_t> lineBreakPoints) {
+ std::set<std::size_t> lineBreakPoints) {
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setPara(impl->bidiText, input.c_str(), static_cast<int32_t>(input.size()),
@@ -100,9 +96,9 @@ std::vector<std::u16string> BiDi::processText(const std::u16string& input,
return applyLineBreaking(lineBreakPoints);
}
-std::u16string BiDi::getLine(int32_t start, int32_t end) {
+std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
UErrorCode errorCode = U_ZERO_ERROR;
- ubidi_setLine(impl->bidiText, start, end, impl->bidiLine, &errorCode);
+ ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode);
if (U_FAILURE(errorCode)) {
throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));
diff --git a/platform/qt/src/bidi.cpp b/platform/qt/src/bidi.cpp
index efb6932252..0eaeb3f2f4 100644
--- a/platform/qt/src/bidi.cpp
+++ b/platform/qt/src/bidi.cpp
@@ -16,17 +16,17 @@ std::u16string applyArabicShaping(const std::u16string& input) {
return utf16string.toStdU16String();
}
-void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
- lineBreakPoints.insert(bidi.impl->string.length());
+void BiDi::mergeParagraphLineBreaks(std::set<std::size_t>& lineBreakPoints) {
+ lineBreakPoints.insert(static_cast<std::size_t>(bidi.impl->string.length()));
}
std::vector<std::u16string>
-BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
+BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
mergeParagraphLineBreaks(lineBreakPoints);
std::vector<std::u16string> transformedLines;
- int32_t start = 0;
- for (int32_t lineBreakPoint : lineBreakPoints) {
+ std::size_t start = 0;
+ for (std::size_t lineBreakPoint : lineBreakPoints) {
transformedLines.push_back(bidi.getLine(start, lineBreakPoint));
start = lineBreakPoint;
}
@@ -38,16 +38,15 @@ BiDi::BiDi() : impl(std::make_unique<BiDiImpl>())
{
}
-BiDi::~BiDi() {
-}
+BiDi::~BiDi() = default;
-std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<int32_t> lineBreakPoints) {
+std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<std::size_t> lineBreakPoints) {
impl->string = QString::fromStdU16String(input);
return applyLineBreaking(lineBreakPoints);
}
-std::u16string BiDi::getLine(int32_t start, int32_t end) {
- return impl->string.mid(start, end - start).toStdU16String();
+std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
+ return impl->string.mid(static_cast<int32_t>(start), static_cast<int32_t>(end - start)).toStdU16String();
}
} // end namespace mbgl
diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp
index 73bb556a00..59d306489c 100644
--- a/src/mbgl/text/bidi.hpp
+++ b/src/mbgl/text/bidi.hpp
@@ -19,12 +19,12 @@ public:
BiDi();
~BiDi();
- std::vector<std::u16string> processText(const std::u16string&, std::set<int32_t>);
+ std::vector<std::u16string> processText(const std::u16string&, std::set<std::size_t>);
private:
- void mergeParagraphLineBreaks(std::set<int32_t>&);
- std::vector<std::u16string> applyLineBreaking(std::set<int32_t>);
- std::u16string getLine(int32_t start, int32_t end);
+ void mergeParagraphLineBreaks(std::set<std::size_t>&);
+ std::vector<std::u16string> applyLineBreaking(std::set<std::size_t>);
+ std::u16string getLine(std::size_t start, std::size_t end);
std::unique_ptr<BiDiImpl> impl;
};
diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp
index 537d9c0579..0040926103 100644
--- a/src/mbgl/text/glyph_set.cpp
+++ b/src/mbgl/text/glyph_set.cpp
@@ -64,7 +64,7 @@ void align(Shaping& shaping,
const float verticalAlign,
const float maxLineLength,
const float lineHeight,
- const uint32_t lineCount,
+ const std::size_t lineCount,
const Point<float>& translate) {
const float shiftX =
(justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */);
@@ -80,8 +80,8 @@ void align(Shaping& shaping,
// justify left = 0, right = 1, center = .5
void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
const std::map<uint32_t, SDFGlyph>& sdfs,
- uint32_t start,
- uint32_t end,
+ std::size_t start,
+ std::size_t end,
float justify) {
if (!justify) {
return;
@@ -93,34 +93,104 @@ void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
const uint32_t lastAdvance = it->second.metrics.advance;
const float lineIndent = float(glyph.x + lastAdvance) * justify;
- for (uint32_t j = start; j <= end; j++) {
+ for (std::size_t j = start; j <= end; j++) {
positionedGlyphs[j].x -= lineIndent;
}
}
}
-float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput,
+float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const {
float totalWidth = 0;
- // totalWidth doesn't include the last character for magical tuning reasons. This makes the
- // algorithm a little more agressive about trying to fit the text into fewer lines, taking
- // advantage of the tolerance for going a little over maxWidth
- for (uint32_t i = 0; i < logicalInput.size() - 1; i++) {
- auto it = sdfs.find(logicalInput[i]);
+ for (char16_t chr : logicalInput) {
+ auto it = sdfs.find(chr);
if (it != sdfs.end()) {
totalWidth += it->second.metrics.advance + spacing;
}
}
- int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
- return totalWidth / lineCount;
+ int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
+ return totalWidth / targetLineCount;
+}
+
+float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) {
+ const float raggedness = std::pow(lineWidth - targetWidth, 2);
+ if (isLastBreak && lineWidth < targetWidth) {
+ // Be more tolerant of short final lines
+ return std::fmax(0, raggedness - 150);
+ }
+ if (penalty < 0) {
+ return raggedness - std::pow(penalty, 2);
+ }
+ return raggedness + std::pow(penalty, 2);
+}
+
+float calculatePenalty(char16_t codePoint, char16_t previousCodePoint) {
+ float penalty = 0;
+ // Force break on newline
+ if (codePoint == 0x0a) {
+ penalty -= 10000;
+ }
+ // Penalize open parenthesis at end of line
+ if (previousCodePoint && (previousCodePoint == 0x28 || previousCodePoint == 0xff08)) {
+ penalty += 50;
+ }
+ // Penalize close parenthesis at beginning of line
+ if (codePoint == 0x29 || codePoint == 0xff09) {
+ penalty += 50;
+ }
+ return penalty;
+}
+
+struct PotentialBreak {
+ PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness)
+ : index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness)
+ {}
+
+ const std::size_t index;
+ const float x;
+ const PotentialBreak* priorBreak;
+ const float badness;
+};
+
+
+PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list<PotentialBreak>& potentialBreaks, const float penalty, const bool isLastBreak) {
+ // We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth
+ // ...but in fact we allow lines longer than maxWidth (if there's no break points)
+ // ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give
+ // more lopsided results.
+
+ const PotentialBreak* bestPriorBreak = nullptr;
+ float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak);
+
+ for (const auto& potentialBreak : potentialBreaks) {
+ const float lineWidth = breakX - potentialBreak.x;
+ float breakBadness = calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness;
+ if (breakBadness <= bestBreakBadness) {
+ bestPriorBreak = &potentialBreak;
+ bestBreakBadness = breakBadness;
+ }
+ }
+
+ return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness);
+}
+
+std::set<std::size_t> leastBadBreaks(const PotentialBreak& lastLineBreak) {
+ std::set<std::size_t> leastBadBreaks = { lastLineBreak.index };
+ const PotentialBreak* priorBreak = lastLineBreak.priorBreak;
+ while (priorBreak) {
+ leastBadBreaks.insert(priorBreak->index);
+ priorBreak = priorBreak->priorBreak;
+ }
+ return leastBadBreaks;
}
+
// We determine line breaks based on shaped text in logical order. Working in visual order would be
// more intuitive, but we can't do that because the visual order may be changed by line breaks!
-std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
+std::set<std::size_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const {
if (!maxWidth) {
@@ -130,42 +200,34 @@ std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInp
if (logicalInput.empty()) {
return {};
}
-
- if (util::i18n::allowsIdeographicBreaking(logicalInput)) {
- maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth);
- }
-
- std::set<int32_t> lineBreakPoints;
+
+ const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth);
+
+ std::list<PotentialBreak> potentialBreaks;
float currentX = 0;
- uint32_t lastSafeBreak = 0;
- float lastSafeBreakX = 0;
- for (uint32_t i = 0; i < logicalInput.size(); i++) {
- auto it = sdfs.find(logicalInput[i]);
+ for (std::size_t i = 0; i < logicalInput.size(); i++) {
+ const char16_t codePoint = logicalInput[i];
+ auto it = sdfs.find(codePoint);
if (it == sdfs.end()) {
continue;
}
- const SDFGlyph& glyph = it->second;
-
// Ideographic characters, spaces, and word-breaking punctuation that often appear without
// surrounding spaces.
- if (util::i18n::allowsWordBreaking(glyph.id) ||
- util::i18n::allowsIdeographicBreaking(glyph.id)) {
- lastSafeBreak = i;
- lastSafeBreakX = currentX;
- }
+ if (util::i18n::allowsWordBreaking(codePoint) ||
+ util::i18n::allowsIdeographicBreaking(codePoint)) {
+ const char16_t previousCodePoint = i > 0 ? logicalInput[i-1] : 0;
- if (currentX > maxWidth && lastSafeBreak > 0) {
- lineBreakPoints.insert(lastSafeBreak);
- currentX -= lastSafeBreakX;
- lastSafeBreakX = 0;
+ potentialBreaks.push_back(evaluateBreak(i, currentX, targetWidth, potentialBreaks,
+ calculatePenalty(codePoint, previousCodePoint),
+ false));
}
- currentX += glyph.metrics.advance + spacing;
+ currentX += it->second.metrics.advance + spacing;
}
- return lineBreakPoints;
+ return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true));
}
void GlyphSet::shapeLines(Shaping& shaping,
@@ -194,7 +256,7 @@ void GlyphSet::shapeLines(Shaping& shaping,
continue;
}
- uint32_t lineStartIndex = static_cast<uint32_t>(shaping.positionedGlyphs.size());
+ std::size_t lineStartIndex = shaping.positionedGlyphs.size();
for (char16_t chr : line) {
auto it = sdfs.find(chr);
if (it == sdfs.end()) {
@@ -207,12 +269,12 @@ void GlyphSet::shapeLines(Shaping& shaping,
}
// Only justify if we placed at least one glyph
- if (static_cast<uint32_t>(shaping.positionedGlyphs.size()) != lineStartIndex) {
+ if (shaping.positionedGlyphs.size() != lineStartIndex) {
float lineLength = x - spacing; // Don't count trailing spacing
maxLineLength = util::max(lineLength, maxLineLength);
justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex,
- static_cast<uint32_t>(shaping.positionedGlyphs.size()) - 1, justify);
+ shaping.positionedGlyphs.size() - 1, justify);
}
x = 0;
@@ -220,7 +282,7 @@ void GlyphSet::shapeLines(Shaping& shaping,
}
align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight,
- static_cast<uint32_t>(lines.size()), translate);
+ lines.size(), translate);
const uint32_t height = lines.size() * lineHeight;
// Calculate the bounding box
diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp
index b48973b6ea..3037cefca0 100644
--- a/src/mbgl/text/glyph_set.hpp
+++ b/src/mbgl/text/glyph_set.hpp
@@ -21,10 +21,10 @@ public:
BiDi& bidi) const;
private:
- float determineIdeographicLineWidth(const std::u16string& logicalInput,
+ float determineAverageLineWidth(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const;
- std::set<int32_t> determineLineBreaks(const std::u16string& logicalInput,
+ std::set<std::size_t> determineLineBreaks(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const;
diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp
index 4be624d2e5..33ce5e22de 100644
--- a/src/mbgl/util/i18n.cpp
+++ b/src/mbgl/util/i18n.cpp
@@ -298,6 +298,8 @@ bool allowsWordBreaking(uint16_t chr) {
return (chr == 0x0a /* newline */
|| chr == 0x20 /* space */
|| chr == 0x26 /* ampersand */
+ || chr == 0x28 /* open parenthesis */
+ || chr == 0x29 /* close parenthesis */
|| chr == 0x2b /* plus sign */
|| chr == 0x2d /* hyphen-minus */
|| chr == 0x2f /* solidus */