summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Loer <chris.loer@gmail.com>2017-01-07 11:35:48 -0800
committerChris Loer <chris.loer@gmail.com>2017-01-09 10:02:39 -0800
commite8ec25f6259aea3fea539029aa84b83f64543c44 (patch)
treed806dcd96f276a393c256b4f1c5fcdb7622b6ec9
parent2e3f791f761eb1a1c6f9b6a52c438729d4340825 (diff)
downloadqtlocation-mapboxgl-upstream/ios-v3.4.0-arabic.tar.gz
[core] Use ICU for Arabic text supportupstream/ios-v3.4.0-arabic
- Supports bidirectional text layout - New "raggedness minimizing" line breaking algorithm
-rw-r--r--CMakeLists.txt1
-rw-r--r--cmake/core-files.cmake2
-rw-r--r--cmake/core.cmake1
-rw-r--r--platform/default/mbgl/storage/offline_download.cpp2
-rw-r--r--platform/default/string_stdlib.cpp2
-rw-r--r--src/mbgl/layout/merge_lines.cpp4
-rw-r--r--src/mbgl/layout/symbol_feature.hpp2
-rw-r--r--src/mbgl/layout/symbol_layout.cpp9
-rw-r--r--src/mbgl/layout/symbol_layout.hpp7
-rw-r--r--src/mbgl/text/bidi.cpp127
-rw-r--r--src/mbgl/text/bidi.hpp32
-rw-r--r--src/mbgl/text/glyph.cpp2
-rw-r--r--src/mbgl/text/glyph.hpp6
-rw-r--r--src/mbgl/text/glyph_atlas.cpp4
-rw-r--r--src/mbgl/text/glyph_atlas.hpp2
-rw-r--r--src/mbgl/text/glyph_set.cpp316
-rw-r--r--src/mbgl/text/glyph_set.hpp33
-rw-r--r--src/mbgl/util/utf.hpp15
-rw-r--r--test/util/merge_lines.test.cpp4
19 files changed, 447 insertions, 124 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5e99f8031e..08dd089924 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,7 @@ mason_use(earcut VERSION 0.12.1 HEADER_ONLY)
mason_use(protozero VERSION 1.4.2 HEADER_ONLY)
mason_use(pixelmatch VERSION 0.10.0 HEADER_ONLY)
mason_use(geojson VERSION 0.3.2 HEADER_ONLY)
+mason_use(icu VERSION 58.1)
if(WITH_COVERAGE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
diff --git a/cmake/core-files.cmake b/cmake/core-files.cmake
index 4d5cddbc49..2c2686a02d 100644
--- a/cmake/core-files.cmake
+++ b/cmake/core-files.cmake
@@ -372,6 +372,8 @@ set(MBGL_CORE_FILES
src/mbgl/text/quads.hpp
src/mbgl/text/shaping.cpp
src/mbgl/text/shaping.hpp
+ src/mbgl/text/bidi.cpp
+ src/mbgl/text/bidi.hpp
# tile
src/mbgl/tile/geojson_tile.cpp
diff --git a/cmake/core.cmake b/cmake/core.cmake
index bc5e7eb718..9c1bf4a6f2 100644
--- a/cmake/core.cmake
+++ b/cmake/core.cmake
@@ -45,6 +45,7 @@ target_add_mason_package(mbgl-core PRIVATE supercluster)
target_add_mason_package(mbgl-core PRIVATE kdbush)
target_add_mason_package(mbgl-core PRIVATE earcut)
target_add_mason_package(mbgl-core PRIVATE protozero)
+target_add_mason_package(mbgl-core PRIVATE icu)
mbgl_platform_core()
diff --git a/platform/default/mbgl/storage/offline_download.cpp b/platform/default/mbgl/storage/offline_download.cpp
index 9e2e11c86f..3edc75845c 100644
--- a/platform/default/mbgl/storage/offline_download.cpp
+++ b/platform/default/mbgl/storage/offline_download.cpp
@@ -184,7 +184,7 @@ void OfflineDownload::activateDownload() {
if (!parser.glyphURL.empty()) {
for (const auto& fontStack : parser.fontStacks()) {
- for (uint32_t i = 0; i < GLYPH_RANGES_PER_FONT_STACK; i++) {
+ for (char16_t i = 0; i < GLYPH_RANGES_PER_FONT_STACK; i++) {
queueResource(Resource::glyphs(parser.glyphURL, fontStack, getGlyphRange(i * GLYPHS_PER_GLYPH_RANGE)));
}
}
diff --git a/platform/default/string_stdlib.cpp b/platform/default/string_stdlib.cpp
index 90a75c1738..0e97fc54d5 100644
--- a/platform/default/string_stdlib.cpp
+++ b/platform/default/string_stdlib.cpp
@@ -1,10 +1,10 @@
#include <mbgl/platform/platform.hpp>
-#include <mbgl/util/utf.hpp>
#define NU_WITH_TOUPPER
#define NU_WITH_TOLOWER
#define NU_WITH_UTF8_WRITER
#include <libnu/libnu.h>
#include <cstring>
+#include <sstream>
namespace mbgl { namespace platform {
diff --git a/src/mbgl/layout/merge_lines.cpp b/src/mbgl/layout/merge_lines.cpp
index f4fdb82617..676cbc092d 100644
--- a/src/mbgl/layout/merge_lines.cpp
+++ b/src/mbgl/layout/merge_lines.cpp
@@ -47,10 +47,10 @@ enum class Side {
};
size_t
-getKey(const std::u32string& text, const GeometryCollection& geom, Side side) {
+getKey(const std::u16string& text, const GeometryCollection& geom, Side side) {
const GeometryCoordinate& coord = side == Side::Right ? geom[0].back() : geom[0].front();
- auto hash = std::hash<std::u32string>()(text);
+ auto hash = std::hash<std::u16string>()(text);
boost::hash_combine(hash, coord.x);
boost::hash_combine(hash, coord.y);
return hash;
diff --git a/src/mbgl/layout/symbol_feature.hpp b/src/mbgl/layout/symbol_feature.hpp
index 99db4f9ac5..9e0eacaac5 100644
--- a/src/mbgl/layout/symbol_feature.hpp
+++ b/src/mbgl/layout/symbol_feature.hpp
@@ -10,7 +10,7 @@ namespace mbgl {
class SymbolFeature {
public:
GeometryCollection geometry;
- optional<std::u32string> text;
+ optional<std::u16string> text;
optional<std::string> icon;
std::size_t index;
};
diff --git a/src/mbgl/layout/symbol_layout.cpp b/src/mbgl/layout/symbol_layout.cpp
index 07ba2bf4a3..94dc1d126e 100644
--- a/src/mbgl/layout/symbol_layout.cpp
+++ b/src/mbgl/layout/symbol_layout.cpp
@@ -90,10 +90,10 @@ SymbolLayout::SymbolLayout(std::string bucketName_,
u8string = platform::lowercase(u8string);
}
- ft.text = util::utf8_to_utf32::convert(u8string);
+ ft.text = applyArabicShaping(util::utf8_to_utf16::convert(u8string));
// Loop through all characters of this text and collect unique codepoints.
- for (char32_t chr : *ft.text) {
+ for (char16_t chr : *ft.text) {
ranges.insert(getGlyphRange(chr));
}
}
@@ -201,7 +201,8 @@ void SymbolLayout::prepare(uintptr_t tileUID,
/* verticalAlign */ verticalAlign,
/* justify */ justify,
/* spacing: ems */ layout.textLetterSpacing * 24,
- /* translate */ Point<float>(layout.textOffset.value[0], layout.textOffset.value[1]));
+ /* translate */ Point<float>(layout.textOffset.value[0], layout.textOffset.value[1]),
+ /* bidirectional algorithm object */ bidi);
// Add the glyphs we need for this label to the glyph atlas.
if (shapedText) {
@@ -309,7 +310,7 @@ void SymbolLayout::addFeature(const GeometryCollection &lines,
}
}
-bool SymbolLayout::anchorIsTooClose(const std::u32string &text, const float repeatDistance, Anchor &anchor) {
+bool SymbolLayout::anchorIsTooClose(const std::u16string &text, const float repeatDistance, Anchor &anchor) {
if (compareText.find(text) == compareText.end()) {
compareText.emplace(text, Anchors());
} else {
diff --git a/src/mbgl/layout/symbol_layout.hpp b/src/mbgl/layout/symbol_layout.hpp
index 54acf84aaf..c21398fabf 100644
--- a/src/mbgl/layout/symbol_layout.hpp
+++ b/src/mbgl/layout/symbol_layout.hpp
@@ -4,6 +4,7 @@
#include <mbgl/style/layers/symbol_layer_properties.hpp>
#include <mbgl/layout/symbol_feature.hpp>
#include <mbgl/layout/symbol_instance.hpp>
+#include <mbgl/text/bidi.hpp>
#include <memory>
#include <map>
@@ -64,8 +65,8 @@ private:
const GlyphPositions& face,
const size_t index);
- bool anchorIsTooClose(const std::u32string& text, const float repeatDistance, Anchor&);
- std::map<std::u32string, std::vector<Anchor>> compareText;
+ bool anchorIsTooClose(const std::u16string& text, const float repeatDistance, Anchor&);
+ std::map<std::u16string, std::vector<Anchor>> compareText;
void addToDebugBuffers(CollisionTile&, SymbolBucket&);
@@ -91,6 +92,8 @@ private:
GlyphRangeSet ranges;
std::vector<SymbolInstance> symbolInstances;
std::vector<SymbolFeature> features;
+
+ BiDi bidi; // Consider moving this up to geometry tile worker to reduce reinstantiation costs; use of BiDi/ubiditransform object must be constrained to one thread
};
} // namespace mbgl
diff --git a/src/mbgl/text/bidi.cpp b/src/mbgl/text/bidi.cpp
new file mode 100644
index 0000000000..93a778b334
--- /dev/null
+++ b/src/mbgl/text/bidi.cpp
@@ -0,0 +1,127 @@
+#include <memory>
+
+#include <mbgl/text/bidi.hpp>
+#include <unicode/ubidi.h>
+#include <unicode/ushape.h>
+
+namespace mbgl {
+
+ class BiDiImpl {
+ public:
+ BiDiImpl() : bidiText(ubidi_open()), bidiLine(ubidi_open()) {
+ }
+ ~BiDiImpl() {
+ ubidi_close(bidiText);
+ ubidi_close(bidiLine);
+ }
+
+ UBiDi* bidiText = nullptr;
+ UBiDi* bidiLine = nullptr;
+ };
+
+ BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
+ BiDi::~BiDi() = default;
+
+ // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
+ // logical order. Output won't be intelligible until the bidirectional algorithm is applied
+ std::u16string applyArabicShaping(const std::u16string& input) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+ const int32_t outputLength =
+ u_shapeArabic(input.c_str(), static_cast<int32_t>(input.size()), NULL, 0,
+ (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
+ (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
+ &errorCode);
+
+ // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR
+ errorCode = U_ZERO_ERROR;
+
+ auto outputText = std::make_unique<UChar[]>(outputLength);
+ u_shapeArabic(input.c_str(), static_cast<int32_t>(input.size()), outputText.get(), outputLength,
+ (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
+ (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
+ &errorCode);
+
+ // If the algorithm fails for any reason, fall back to non-transformed text
+ if (U_FAILURE(errorCode))
+ return input;
+
+ return std::u16string(outputText.get(), outputLength);
+ }
+
+ void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
+ int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText);
+ for (int32_t i = 0; i < paragraphCount; i++) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t paragraphEndIndex;
+ ubidi_getParagraphByIndex(impl->bidiText, i, NULL, &paragraphEndIndex, NULL, &errorCode);
+
+ if (U_FAILURE(errorCode)) {
+ throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") +
+ u_errorName(errorCode));
+ }
+
+ lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex));
+ }
+ }
+
+ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
+ // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
+ // paragraph boundaries are included in the set of line break points. The calling code might not
+ // include the line break because it didn't need to wrap at that point, or because the text was
+ // separated with a more exotic code point such as (U+001C)
+ mergeParagraphLineBreaks(lineBreakPoints);
+
+ std::vector<std::u16string> transformedLines;
+ std::size_t start = 0;
+ for (std::size_t lineBreakPoint : lineBreakPoints) {
+ transformedLines.push_back(getLine(start, lineBreakPoint));
+ start = lineBreakPoint;
+ }
+
+ return transformedLines;
+ }
+
+ std::vector<std::u16string> BiDi::processText(const std::u16string& input,
+ std::set<std::size_t> lineBreakPoints) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+ ubidi_setPara(impl->bidiText, input.c_str(), static_cast<int32_t>(input.size()),
+ UBIDI_DEFAULT_LTR, NULL, &errorCode);
+
+ if (U_FAILURE(errorCode)) {
+ throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode));
+ }
+
+ return applyLineBreaking(lineBreakPoints);
+ }
+
+ std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode);
+
+ if (U_FAILURE(errorCode)) {
+ throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));
+ }
+
+ // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve
+ // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require
+ // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
+ const int32_t outputLength = ubidi_getProcessedLength(impl->bidiLine);
+ auto outputText = std::make_unique<UChar[]>(outputLength);
+
+ // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
+ // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
+ // they don't show up on screen (some fonts have glyphs representing them)
+ ubidi_writeReordered(impl->bidiLine, outputText.get(), outputLength,
+ UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);
+
+ if (U_FAILURE(errorCode)) {
+ throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") +
+ u_errorName(errorCode));
+ }
+
+ return std::u16string(outputText.get(), outputLength);
+ }
+
+} // end namespace mbgl
diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp
new file mode 100644
index 0000000000..59d306489c
--- /dev/null
+++ b/src/mbgl/text/bidi.hpp
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <set>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include <mbgl/util/noncopyable.hpp>
+
+namespace mbgl {
+
+class BiDi;
+class BiDiImpl;
+
+std::u16string applyArabicShaping(const std::u16string&);
+
+class BiDi : private util::noncopyable {
+public:
+ BiDi();
+ ~BiDi();
+
+ std::vector<std::u16string> processText(const std::u16string&, std::set<std::size_t>);
+
+private:
+ void mergeParagraphLineBreaks(std::set<std::size_t>&);
+ std::vector<std::u16string> applyLineBreaking(std::set<std::size_t>);
+ std::u16string getLine(std::size_t start, std::size_t end);
+
+ std::unique_ptr<BiDiImpl> impl;
+};
+
+} // end namespace mbgl
diff --git a/src/mbgl/text/glyph.cpp b/src/mbgl/text/glyph.cpp
index a877d7a799..29929b73e6 100644
--- a/src/mbgl/text/glyph.cpp
+++ b/src/mbgl/text/glyph.cpp
@@ -3,7 +3,7 @@
namespace mbgl {
// Note: this only works for the BMP
-GlyphRange getGlyphRange(char32_t glyph) {
+GlyphRange getGlyphRange(char16_t glyph) {
unsigned start = (glyph/256) * 256;
unsigned end = (start + 255);
if (start > 65280) start = 65280;
diff --git a/src/mbgl/text/glyph.hpp b/src/mbgl/text/glyph.hpp
index 975dc4ad23..a91e1f276a 100644
--- a/src/mbgl/text/glyph.hpp
+++ b/src/mbgl/text/glyph.hpp
@@ -11,7 +11,7 @@
namespace mbgl {
// Note: this only works for the BMP
-GlyphRange getGlyphRange(char32_t glyph);
+GlyphRange getGlyphRange(char16_t glyph);
struct GlyphMetrics {
operator bool() const {
@@ -55,10 +55,10 @@ public:
class Shaping {
public:
explicit Shaping() : top(0), bottom(0), left(0), right(0) {}
- explicit Shaping(float x, float y, std::u32string text_)
+ explicit Shaping(float x, float y, std::u16string text_)
: text(std::move(text_)), top(y), bottom(y), left(x), right(x) {}
std::vector<PositionedGlyph> positionedGlyphs;
- std::u32string text;
+ std::u16string text;
int32_t top;
int32_t bottom;
int32_t left;
diff --git a/src/mbgl/text/glyph_atlas.cpp b/src/mbgl/text/glyph_atlas.cpp
index 2f8c44db59..5377284db2 100644
--- a/src/mbgl/text/glyph_atlas.cpp
+++ b/src/mbgl/text/glyph_atlas.cpp
@@ -84,7 +84,7 @@ void GlyphAtlas::setObserver(GlyphAtlasObserver* observer_) {
}
void GlyphAtlas::addGlyphs(uintptr_t tileUID,
- const std::u32string& text,
+ const std::u16string& text,
const FontStack& fontStack,
const GlyphSet& glyphSet,
GlyphPositions& face)
@@ -93,7 +93,7 @@ void GlyphAtlas::addGlyphs(uintptr_t tileUID,
const std::map<uint32_t, SDFGlyph>& sdfs = glyphSet.getSDFs();
- for (uint32_t chr : text)
+ for (char16_t chr : text)
{
auto sdf_it = sdfs.find(chr);
if (sdf_it == sdfs.end()) {
diff --git a/src/mbgl/text/glyph_atlas.hpp b/src/mbgl/text/glyph_atlas.hpp
index 84875bdd78..2bfc137e53 100644
--- a/src/mbgl/text/glyph_atlas.hpp
+++ b/src/mbgl/text/glyph_atlas.hpp
@@ -53,7 +53,7 @@ public:
void setObserver(GlyphAtlasObserver* observer);
void addGlyphs(uintptr_t tileUID,
- const std::u32string& text,
+ const std::u16string& text,
const FontStack&,
const GlyphSet&,
GlyphPositions&);
diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp
index 0875a83850..a4b197944e 100644
--- a/src/mbgl/text/glyph_set.cpp
+++ b/src/mbgl/text/glyph_set.cpp
@@ -1,7 +1,10 @@
+#include <mbgl/math/minmax.hpp>
#include <mbgl/text/glyph_set.hpp>
#include <mbgl/platform/log.hpp>
-#include <mbgl/math/minmax.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include <algorithm>
#include <cassert>
namespace mbgl {
@@ -26,44 +29,46 @@ void GlyphSet::insert(uint32_t id, SDFGlyph&& glyph) {
}
}
-const std::map<uint32_t, SDFGlyph> &GlyphSet::getSDFs() const {
+const std::map<uint32_t, SDFGlyph>& GlyphSet::getSDFs() const {
return sdfs;
}
-const Shaping GlyphSet::getShaping(const std::u32string &string, const float maxWidth,
- const float lineHeight, const float horizontalAlign,
- const float verticalAlign, const float justify,
- const float spacing, const Point<float> &translate) const {
- Shaping shaping(translate.x * 24, translate.y * 24, string);
-
- // the y offset *should* be part of the font metadata
- const int32_t yOffset = -17;
-
- float x = 0;
- const float y = yOffset;
+const Shaping GlyphSet::getShaping(const std::u16string& logicalInput,
+ const float maxWidth,
+ const float lineHeight,
+ const float horizontalAlign,
+ const float verticalAlign,
+ const float justify,
+ const float spacing,
+ const Point<float>& translate,
+ BiDi& bidi) const {
- // Loop through all characters of this label and shape.
- for (uint32_t chr : string) {
- auto it = sdfs.find(chr);
- if (it != sdfs.end()) {
- shaping.positionedGlyphs.emplace_back(chr, x, y);
- x += it->second.metrics.advance + spacing;
- }
- }
+ // The string stored in shaping.text is used for finding duplicates, but may end up quite
+ // different from the glyphs that get shown
+ Shaping shaping(translate.x * 24, translate.y * 24, logicalInput);
- if (shaping.positionedGlyphs.empty())
- return shaping;
+ std::vector<std::u16string> reorderedLines =
+ bidi.processText(logicalInput,
+ determineLineBreaks(logicalInput, spacing, maxWidth));
- lineWrap(shaping, lineHeight, maxWidth, horizontalAlign, verticalAlign, justify, translate);
+ shapeLines(shaping, reorderedLines, spacing, lineHeight, horizontalAlign, verticalAlign,
+ justify, translate);
return shaping;
}
-void align(Shaping &shaping, const float justify, const float horizontalAlign,
- const float verticalAlign, const uint32_t maxLineLength, const float lineHeight,
- const uint32_t line, const Point<float> &translate) {
- const float shiftX = (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24/* one em */);
- const float shiftY = (-verticalAlign * (line + 1) + 0.5) * lineHeight + ::round(translate.y * 24/* one em */);
+void align(Shaping& shaping,
+ const float justify,
+ const float horizontalAlign,
+ const float verticalAlign,
+ const float maxLineLength,
+ const float lineHeight,
+ const std::size_t lineCount,
+ const Point<float>& translate) {
+ const float shiftX =
+ (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */);
+ const float shiftY =
+ (-verticalAlign * lineCount + 0.5) * lineHeight + ::round(translate.y * 24 /* one em */);
for (auto& glyph : shaping.positionedGlyphs) {
glyph.x += shiftX;
@@ -71,94 +76,225 @@ void align(Shaping &shaping, const float justify, const float horizontalAlign,
}
}
-void justifyLine(std::vector<PositionedGlyph> &positionedGlyphs, const std::map<uint32_t, SDFGlyph> &sdfs, uint32_t start,
- uint32_t end, float justify) {
- PositionedGlyph &glyph = positionedGlyphs[end];
+// justify left = 0, right = 1, center = .5
+void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
+ const std::map<uint32_t, SDFGlyph>& sdfs,
+ std::size_t start,
+ std::size_t end,
+ float justify) {
+ if (!justify) {
+ return;
+ }
+
+ PositionedGlyph& glyph = positionedGlyphs[end];
auto it = sdfs.find(glyph.glyph);
if (it != sdfs.end()) {
const uint32_t lastAdvance = it->second.metrics.advance;
const float lineIndent = float(glyph.x + lastAdvance) * justify;
- for (uint32_t j = start; j <= end; j++) {
+ for (std::size_t j = start; j <= end; j++) {
positionedGlyphs[j].x -= lineIndent;
}
}
}
-void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, const float maxWidth,
- const float horizontalAlign, const float verticalAlign,
- const float justify, const Point<float> &translate) const {
- uint32_t lastSafeBreak = 0;
+float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput,
+ const float spacing,
+ float maxWidth) const {
+ float totalWidth = 0;
- uint32_t lengthBeforeCurrentLine = 0;
- uint32_t lineStartIndex = 0;
- uint32_t line = 0;
+ for (char16_t chr : logicalInput) {
+ auto it = sdfs.find(chr);
+ if (it != sdfs.end()) {
+ totalWidth += it->second.metrics.advance + spacing;
+ }
+ }
- uint32_t maxLineLength = 0;
+ int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
+ return totalWidth / targetLineCount;
+}
+
+float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) {
+ const float raggedness = std::pow(lineWidth - targetWidth, 2);
+ if (isLastBreak) {
+ // Favor finals lines shorter than average over longer than average
+ if (lineWidth < targetWidth) {
+ return raggedness / 2;
+ } else {
+ return raggedness * 2;
+ }
+ }
+ if (penalty < 0) {
+ return raggedness - std::pow(penalty, 2);
+ }
+ return raggedness + std::pow(penalty, 2);
+}
+
+float calculatePenalty(char16_t codePoint, char16_t nextCodePoint) {
+ float penalty = 0;
+ // Force break on newline
+ if (codePoint == 0x0a) {
+ penalty -= 10000;
+ }
+ // Penalize open parenthesis at end of line
+ if (codePoint == 0x28 || codePoint == 0xff08) {
+ penalty += 50;
+ }
- std::vector<PositionedGlyph> &positionedGlyphs = shaping.positionedGlyphs;
+ // Penalize close parenthesis at beginning of line
+ if (nextCodePoint == 0x29 || nextCodePoint == 0xff09) {
+ penalty += 50;
+ }
+
+ return penalty;
+}
+
+struct PotentialBreak {
+ PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness)
+ : index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness)
+ {}
+
+ const std::size_t index;
+ const float x;
+ const PotentialBreak* priorBreak;
+ const float badness;
+};
+
+
+PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list<PotentialBreak>& potentialBreaks, const float penalty, const bool isLastBreak) {
+ // We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth
+ // ...but in fact we allow lines longer than maxWidth (if there's no break points)
+ // ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give
+ // more lopsided results.
+
+ const PotentialBreak* bestPriorBreak = nullptr;
+ float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak);
+ for (const auto& potentialBreak : potentialBreaks) {
+ const float lineWidth = breakX - potentialBreak.x;
+ float breakBadness =
+ calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness;
+ if (breakBadness <= bestBreakBadness) {
+ bestPriorBreak = &potentialBreak;
+ bestBreakBadness = breakBadness;
+ }
+ }
+
+ return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness);
+}
+
+std::set<std::size_t> leastBadBreaks(const PotentialBreak& lastLineBreak) {
+ std::set<std::size_t> leastBadBreaks = { lastLineBreak.index };
+ const PotentialBreak* priorBreak = lastLineBreak.priorBreak;
+ while (priorBreak) {
+ leastBadBreaks.insert(priorBreak->index);
+ priorBreak = priorBreak->priorBreak;
+ }
+ return leastBadBreaks;
+}
+
+
+// We determine line breaks based on shaped text in logical order. Working in visual order would be
+// more intuitive, but we can't do that because the visual order may be changed by line breaks!
+std::set<std::size_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
+ const float spacing,
+ float maxWidth) const {
+ if (!maxWidth) {
+ return {};
+ }
+
+ if (logicalInput.empty()) {
+ return {};
+ }
+
+ const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth);
+
+ std::list<PotentialBreak> potentialBreaks;
+ float currentX = 0;
+
+ for (std::size_t i = 0; i < logicalInput.size(); i++) {
+ const char16_t codePoint = logicalInput[i];
+ auto it = sdfs.find(codePoint);
+ if (it != sdfs.end() && !boost::algorithm::is_any_of(u" \t\n\v\f\r")(codePoint)) {
+ currentX += it->second.metrics.advance + spacing;
+ }
+
+ if (i >= logicalInput.size() - 1)
+ continue;
+
+ // Spaces, plus word-breaking punctuation that often appears without surrounding spaces.
+ if (codePoint == 0x20 /* space */
+ || codePoint == 0x26 /* ampersand */
+ || codePoint == 0x2b /* plus sign */
+ || codePoint == 0x2d /* hyphen-minus */
+ || codePoint == 0x2f /* solidus */
+ || codePoint == 0xad /* soft hyphen */
+ || codePoint == 0xb7 /* middle dot */
+ || codePoint == 0x200b /* zero-width space */
+ || codePoint == 0x2010 /* hyphen */
+ || codePoint == 0x2013 /* en dash */) {
+ potentialBreaks.push_back(evaluateBreak(i+1, currentX, targetWidth, potentialBreaks,
+ calculatePenalty(codePoint, logicalInput[i+1]),
+ false));
+ }
+ }
- if (maxWidth) {
- for (uint32_t i = 0; i < positionedGlyphs.size(); i++) {
- PositionedGlyph &shape = positionedGlyphs[i];
+ return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true));
+}
- shape.x -= lengthBeforeCurrentLine;
- shape.y += lineHeight * line;
+void GlyphSet::shapeLines(Shaping& shaping,
+ const std::vector<std::u16string>& lines,
+ const float spacing,
+ const float lineHeight,
+ const float horizontalAlign,
+ const float verticalAlign,
+ const float justify,
+ const Point<float>& translate) const {
- if (shape.x > maxWidth && lastSafeBreak > 0) {
+ // the y offset *should* be part of the font metadata
+ const int32_t yOffset = -17;
- uint32_t lineLength = positionedGlyphs[lastSafeBreak + 1].x;
- maxLineLength = util::max(lineLength, maxLineLength);
+ float x = 0;
+ float y = yOffset;
- for (uint32_t k = lastSafeBreak + 1; k <= i; k++) {
- positionedGlyphs[k].y += lineHeight;
- positionedGlyphs[k].x -= lineLength;
- }
+ float maxLineLength = 0;
- if (justify) {
- // Collapse invisible characters.
- uint32_t breakGlyph = positionedGlyphs[lastSafeBreak].glyph;
- uint32_t lineEnd = lastSafeBreak;
- if (breakGlyph == 0x20 /* space */
- || breakGlyph == 0x200b /* zero-width space */) {
- lineEnd--;
- }
+ for (std::u16string line : lines) {
+ // Collapse whitespace so it doesn't throw off justification
+ boost::algorithm::trim_if(line, boost::algorithm::is_any_of(u" \t\n\v\f\r"));
- justifyLine(positionedGlyphs, sdfs, lineStartIndex, lineEnd, justify);
- }
+ if (line.empty()) {
+ y += lineHeight; // Still need a line feed after empty line
+ continue;
+ }
- lineStartIndex = lastSafeBreak + 1;
- lastSafeBreak = 0;
- lengthBeforeCurrentLine += lineLength;
- line++;
+ std::size_t lineStartIndex = shaping.positionedGlyphs.size();
+ for (char16_t chr : line) {
+ auto it = sdfs.find(chr);
+ if (it == sdfs.end()) {
+ continue;
}
- // Spaces, plus word-breaking punctuation that often appears without surrounding spaces.
- if (shape.glyph == 0x20 /* space */
- || shape.glyph == 0x26 /* ampersand */
- || shape.glyph == 0x2b /* plus sign */
- || shape.glyph == 0x2d /* hyphen-minus */
- || shape.glyph == 0x2f /* solidus */
- || shape.glyph == 0xad /* soft hyphen */
- || shape.glyph == 0xb7 /* middle dot */
- || shape.glyph == 0x200b /* zero-width space */
- || shape.glyph == 0x2010 /* hyphen */
- || shape.glyph == 0x2013 /* en dash */) {
- lastSafeBreak = i;
- }
+ const SDFGlyph& glyph = it->second;
+ shaping.positionedGlyphs.emplace_back(chr, x, y);
+ x += glyph.metrics.advance + spacing;
}
- }
- const PositionedGlyph& lastPositionedGlyph = positionedGlyphs.back();
- const auto lastGlyphIt = sdfs.find(lastPositionedGlyph.glyph);
- assert(lastGlyphIt != sdfs.end());
- const uint32_t lastLineLength = lastPositionedGlyph.x + lastGlyphIt->second.metrics.advance;
- maxLineLength = std::max(maxLineLength, lastLineLength);
+ // Only justify if we placed at least one glyph
+ if (shaping.positionedGlyphs.size() != lineStartIndex) {
+ float lineLength = x - spacing; // Don't count trailing spacing
+ maxLineLength = util::max(lineLength, maxLineLength);
+
+ justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex,
+ shaping.positionedGlyphs.size() - 1, justify);
+ }
- const uint32_t height = (line + 1) * lineHeight;
+ x = 0;
+ y += lineHeight;
+ }
- justifyLine(positionedGlyphs, sdfs, lineStartIndex, uint32_t(positionedGlyphs.size()) - 1, justify);
- align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, line, translate);
+ align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight,
+ lines.size(), translate);
+ const uint32_t height = lines.size() * lineHeight;
// Calculate the bounding box
shaping.top += -verticalAlign * height;
diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp
index 37ffdb070a..3037cefca0 100644
--- a/src/mbgl/text/glyph_set.hpp
+++ b/src/mbgl/text/glyph_set.hpp
@@ -1,5 +1,6 @@
#pragma once
+#include <mbgl/text/bidi.hpp>
#include <mbgl/text/glyph.hpp>
#include <mbgl/util/geometry.hpp>
@@ -8,14 +9,34 @@ namespace mbgl {
class GlyphSet {
public:
void insert(uint32_t id, SDFGlyph&&);
- const std::map<uint32_t, SDFGlyph> &getSDFs() const;
- const Shaping getShaping(const std::u32string &string, float maxWidth, float lineHeight,
- float horizontalAlign, float verticalAlign, float justify,
- float spacing, const Point<float> &translate) const;
- void lineWrap(Shaping &shaping, float lineHeight, float maxWidth, float horizontalAlign,
- float verticalAlign, float justify, const Point<float> &translate) const;
+ const std::map<uint32_t, SDFGlyph>& getSDFs() const;
+ const Shaping getShaping(const std::u16string& string,
+ float maxWidth,
+ float lineHeight,
+ float horizontalAlign,
+ float verticalAlign,
+ float justify,
+ float spacing,
+ const Point<float>& translate,
+ BiDi& bidi) const;
private:
+ float determineAverageLineWidth(const std::u16string& logicalInput,
+ const float spacing,
+ float maxWidth) const;
+ std::set<std::size_t> determineLineBreaks(const std::u16string& logicalInput,
+ const float spacing,
+ float maxWidth) const;
+
+ void shapeLines(Shaping& shaping,
+ const std::vector<std::u16string>& lines,
+ const float spacing,
+ float lineHeight,
+ float horizontalAlign,
+ float verticalAlign,
+ float justify,
+ const Point<float>& translate) const;
+
std::map<uint32_t, SDFGlyph> sdfs;
};
diff --git a/src/mbgl/util/utf.hpp b/src/mbgl/util/utf.hpp
index 560ca3ba7f..81330cfc83 100644
--- a/src/mbgl/util/utf.hpp
+++ b/src/mbgl/util/utf.hpp
@@ -2,18 +2,17 @@
#include <memory>
-#include <boost/regex/pending/unicode_iterator.hpp>
+#include <locale>
+#include <codecvt>
namespace mbgl {
namespace util {
-class utf8_to_utf32 {
- public:
- static std::u32string convert(std::string const& utf8)
- {
- boost::u8_to_u32_iterator<std::string::const_iterator> begin(utf8.begin());
- boost::u8_to_u32_iterator<std::string::const_iterator> end(utf8.end());
- return std::u32string(begin,end);
+class utf8_to_utf16 {
+public:
+ static std::u16string convert(std::string const& utf8) {
+ std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
+ return converter.from_bytes(utf8);
}
};
diff --git a/test/util/merge_lines.test.cpp b/test/util/merge_lines.test.cpp
index db81d8b209..30cd1af068 100644
--- a/test/util/merge_lines.test.cpp
+++ b/test/util/merge_lines.test.cpp
@@ -3,8 +3,8 @@
#include <mbgl/layout/merge_lines.hpp>
#include <mbgl/layout/symbol_feature.hpp>
-const std::u32string aaa = U"a";
-const std::u32string bbb = U"b";
+const std::u16string aaa = u"a";
+const std::u16string bbb = u"b";
TEST(MergeLines, SameText) {
// merges lines with the same text