From 5f29c3ba26f84c8aea6f70640c348b32cdf397bf Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Sun, 20 Nov 2016 09:36:07 -0800 Subject: [core] Full support for line breaking bidirectional text using ICU bidi functionality. - Trim whitespace from labels before determining their max-width for alignment. - Fix crash on labels that contain lines with only a single character of whitespace. --- src/mbgl/text/bidi.cpp | 133 ++++++++++++++++++------ src/mbgl/text/bidi.hpp | 36 +++++-- src/mbgl/text/glyph_set.cpp | 248 +++++++++++++++++++++++++++----------------- src/mbgl/text/glyph_set.hpp | 33 ++++-- 4 files changed, 306 insertions(+), 144 deletions(-) (limited to 'src/mbgl/text') diff --git a/src/mbgl/text/bidi.cpp b/src/mbgl/text/bidi.cpp index 4c127e9cab..7d5f6313bc 100644 --- a/src/mbgl/text/bidi.cpp +++ b/src/mbgl/text/bidi.cpp @@ -2,55 +2,124 @@ #include #include -#include #include namespace mbgl { -BiDi::BiDi() { +// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining +// logical order +// Output won't be intelligible until the bidirectional algorithm is applied +std::u16string applyArabicShaping(const std::u16string& input) { UErrorCode errorCode = U_ZERO_ERROR; - transform = ubiditransform_open(&errorCode); // Only error is failure to allocate memory, in - // that case ubidi_transform would fall back to - // creating transform object on the fly + + int32_t outputLength = + u_shapeArabic(input.c_str(), static_cast(input.size()), NULL, 0, + (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | + (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), + &errorCode); + + // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR + errorCode = U_ZERO_ERROR; + + std::unique_ptr outputText = std::make_unique(outputLength); + u_shapeArabic(input.c_str(), static_cast(input.size()), outputText.get(), outputLength, + (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | + (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), + &errorCode); + + // If the algorithm fails for any reason, fall back to non-transformed text + if (U_FAILURE(errorCode)) + return input; + + return std::u16string(outputText.get(), outputLength); +} + +ProcessedBiDiText::ProcessedBiDiText(BiDi& p_bidi) : bidi(p_bidi) { +} + +void ProcessedBiDiText::mergeParagraphLineBreaks(std::set& lineBreakPoints) { + int32_t paragraphCount = ubidi_countParagraphs(bidi.bidiText); + for (int32_t i = 0; i < paragraphCount; i++) { + UErrorCode errorCode = U_ZERO_ERROR; + int32_t paragraphEndIndex; + ubidi_getParagraphByIndex(bidi.bidiText, i, NULL, ¶graphEndIndex, NULL, &errorCode); + + if (U_FAILURE(errorCode)) + throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") + + u_errorName(errorCode)); + + lineBreakPoints.insert(paragraphEndIndex); + } +} + +std::vector +ProcessedBiDiText::applyLineBreaking(std::set lineBreakPoints) { + // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all + // paragraph + // boundaries are included in the set of line break points. The calling code might not include + // the line break because it + // didn't need to wrap at that point, or because the text was separated with a more exotic code + // point such as (U+001C) + mergeParagraphLineBreaks(lineBreakPoints); + + std::vector transformedLines; + int32_t start = 0; + for (int32_t lineBreakPoint : lineBreakPoints) { + transformedLines.push_back(bidi.getLine(start, lineBreakPoint)); + start = lineBreakPoint; + } + + return transformedLines; +} + +BiDi::BiDi() { + bidiText = ubidi_open(); + bidiLine = ubidi_open(); } BiDi::~BiDi() { - if (transform) - ubiditransform_close(transform); + if (bidiText) + ubidi_close(bidiText); + + if (bidiLine) + ubidi_close(bidiLine); } -std::u16string BiDi::bidiTransform(const std::u16string& input) { +ProcessedBiDiText BiDi::processText(const std::u16string& input) { UErrorCode errorCode = U_ZERO_ERROR; - std::unique_ptr outputText = - std::make_unique(input.size() * 2); // Maximum output of ubidi_transform is twice - // the size of input according to - // ubidi_transform.h - uint32_t outputLength = ubiditransform_transform( - transform, input.c_str(), static_cast(input.size()), outputText.get(), - static_cast(input.size()) * 2, - UBIDI_DEFAULT_LTR, // Assume input is LTR unless strong RTL characters are found - UBIDI_LOGICAL, // Input is in logical order - UBIDI_LTR, // Output is in "visual LTR" order - UBIDI_VISUAL, // '' - UBIDI_MIRRORING_ON, // Use mirroring lookups for things like parentheses that need mirroring - // in RTL text - U_SHAPE_LETTERS_SHAPE, // Add options here for handling numbers in bidirectional text - &errorCode); + ubidi_setPara(bidiText, input.c_str(), static_cast(input.size()), UBIDI_DEFAULT_LTR, + NULL, &errorCode); - // If the algorithm fails for any reason, fall back to non-transformed text if (U_FAILURE(errorCode)) - return input; + throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(errorCode)); - return std::u16string(outputText.get(), outputLength); + return ProcessedBiDiText(*this); } -WritingDirection BiDi::baseWritingDirection(const std::u16string& input) { - // This just looks for the first character with a strong direction property, it does not perform - // the BiDi algorithm - return ubidi_getBaseDirection(input.c_str(), static_cast(input.size())) == UBIDI_RTL - ? WritingDirection::RightToLeft - : WritingDirection::LeftToRight; +std::u16string BiDi::getLine(int32_t start, int32_t end) { + UErrorCode errorCode = U_ZERO_ERROR; + ubidi_setLine(bidiText, start, end, bidiLine, &errorCode); + + if (U_FAILURE(errorCode)) + throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode)); + + // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve + // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require + // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi) + int32_t outputLength = ubidi_getProcessedLength(bidiLine); + std::unique_ptr outputText = std::make_unique(outputLength); + + // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses + // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that + // they don't show up on screen (some fonts have glyphs representing them) + ubidi_writeReordered(bidiLine, outputText.get(), outputLength, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode); + + if (U_FAILURE(errorCode)) + throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") + u_errorName(errorCode)); + + return std::u16string(outputText.get(), outputLength); } } // end namespace mbgl diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp index e29bf041e2..8cbe079e57 100644 --- a/src/mbgl/text/bidi.hpp +++ b/src/mbgl/text/bidi.hpp @@ -1,25 +1,47 @@ #pragma once +#include #include +#include #include -struct UBiDiTransform; +struct UBiDi; namespace mbgl { - -enum class WritingDirection : bool { LeftToRight, RightToLeft }; - + +class BiDi; + +std::u16string applyArabicShaping(const std::u16string&); + +class ProcessedBiDiText { +public: + ProcessedBiDiText(BiDi&); + + std::vector applyLineBreaking(std::set); + +private: + void mergeParagraphLineBreaks(std::set&); + + BiDi& bidi; +}; + class BiDi : private util::noncopyable { public: BiDi(); ~BiDi(); - std::u16string bidiTransform(const std::u16string&); - WritingDirection baseWritingDirection(const std::u16string&); + // Calling processText resets internal state, invalidating any existing ProcessedBiDiText + // objects + ProcessedBiDiText processText(const std::u16string&); + + friend class ProcessedBiDiText; private: - UBiDiTransform* transform; + std::u16string getLine(int32_t start, int32_t end); + + UBiDi* bidiText; + UBiDi* bidiLine; }; } // end namespace mbgl diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp index ef556aa537..f1cb85a03a 100644 --- a/src/mbgl/text/glyph_set.cpp +++ b/src/mbgl/text/glyph_set.cpp @@ -1,9 +1,11 @@ -#include -#include #include +#include #include -#include +#include + +#include +#include #include namespace mbgl { @@ -28,45 +30,47 @@ void GlyphSet::insert(uint32_t id, SDFGlyph&& glyph) { } } -const std::map &GlyphSet::getSDFs() const { +const std::map& GlyphSet::getSDFs() const { return sdfs; } -const Shaping GlyphSet::getShaping(const std::u16string &string, const WritingDirection writingDirection, const float maxWidth, - const float lineHeight, const float horizontalAlign, - const float verticalAlign, const float justify, - const float spacing, const Point &translate) const { - Shaping shaping(translate.x * 24, translate.y * 24, string); +const Shaping GlyphSet::getShaping(const std::u16string& logicalInput, + const float maxWidth, + const float lineHeight, + const float horizontalAlign, + const float verticalAlign, + const float justify, + const float spacing, + const Point& translate, + BiDi& bidi) const { - // the y offset *should* be part of the font metadata - const int32_t yOffset = -17; + // The string stored in shaping.text is used for finding duplicates, but may end up quite + // different from the glyphs that get shown + Shaping shaping(translate.x * 24, translate.y * 24, logicalInput); - float x = 0; - const float y = yOffset; + ProcessedBiDiText processedText = bidi.processText(logicalInput); - // Loop through all characters of this label and shape. - for (char16_t chr : string) { - auto it = sdfs.find(chr); - if (it != sdfs.end()) { - shaping.positionedGlyphs.emplace_back(chr, x, y); - x += it->second.metrics.advance + spacing; - } - } + std::vector reorderedLines = + processedText.applyLineBreaking(determineLineBreaks(logicalInput, spacing, maxWidth)); - if (shaping.positionedGlyphs.empty()) - return shaping; - - lineWrap(shaping, lineHeight, maxWidth, horizontalAlign, verticalAlign, justify, translate, - util::i18n::allowsIdeographicBreaking(string), writingDirection); + shapeLines(shaping, reorderedLines, spacing, lineHeight, horizontalAlign, verticalAlign, + justify, translate); return shaping; } -void align(Shaping &shaping, const float justify, const float horizontalAlign, - const float verticalAlign, const uint32_t maxLineLength, const float lineHeight, - const uint32_t line, const Point &translate) { - const float shiftX = (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24/* one em */); - const float shiftY = (-verticalAlign * (line + 1) + 0.5) * lineHeight + ::round(translate.y * 24/* one em */); +void align(Shaping& shaping, + const float justify, + const float horizontalAlign, + const float verticalAlign, + const float maxLineLength, + const float lineHeight, + const uint32_t lineCount, + const Point& translate) { + const float shiftX = + (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */); + const float shiftY = + (-verticalAlign * lineCount + 0.5) * lineHeight + ::round(translate.y * 24 /* one em */); for (auto& glyph : shaping.positionedGlyphs) { glyph.x += shiftX; @@ -74,9 +78,16 @@ void align(Shaping &shaping, const float justify, const float horizontalAlign, } } -void justifyLine(std::vector &positionedGlyphs, const std::map &sdfs, uint32_t start, - uint32_t end, float justify) { - PositionedGlyph &glyph = positionedGlyphs[end]; +// justify left = 0, right = 1, center = .5 +void justifyLine(std::vector& positionedGlyphs, + const std::map& sdfs, + uint32_t start, + uint32_t end, + float justify) { + if (!justify) + return; + + PositionedGlyph& glyph = positionedGlyphs[end]; auto it = sdfs.find(glyph.glyph); if (it != sdfs.end()) { const uint32_t lastAdvance = it->second.metrics.advance; @@ -88,81 +99,122 @@ void justifyLine(std::vector &positionedGlyphs, const std::map< } } -void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, float maxWidth, - const float horizontalAlign, const float verticalAlign, - const float justify, const Point &translate, - bool useBalancedIdeographicBreaking, const WritingDirection writingDirection) const { - float lineFeedOffset = writingDirection == WritingDirection::RightToLeft ? -lineHeight : lineHeight; - - uint32_t lastSafeBreak = 0; +float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const { + float totalWidth = 0; + + // totalWidth doesn't include the last character for magical tuning reasons. This makes the + // algorithm a little + // more agressive about trying to fit the text into fewer lines, taking advantage of the + // tolerance for going a little + // over maxWidth + for (uint32_t i = 0; i < logicalInput.size() - 1; i++) { + auto it = sdfs.find(logicalInput[i]); + if (it != sdfs.end()) + totalWidth += it->second.metrics.advance + spacing; + } - uint32_t lengthBeforeCurrentLine = 0; - uint32_t lineStartIndex = 0; - uint32_t line = 0; + int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth)); + return totalWidth / lineCount; +} + +// We determine line breaks based on shaped text in logical order. Working in visual order would be +// more intuitive, but we can't do that because the visual order may be changed by line breaks! +std::set GlyphSet::determineLineBreaks(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const { + if (!maxWidth) + return {}; + + if (logicalInput.empty()) + return {}; - uint32_t maxLineLength = 0; + if (util::i18n::allowsIdeographicBreaking(logicalInput)) + maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth); + + std::set lineBreakPoints; + float currentX = 0; + uint32_t lastSafeBreak = 0; + float lastSafeBreakX = 0; - std::vector &positionedGlyphs = shaping.positionedGlyphs; + for (uint32_t i = 0; i < logicalInput.size(); i++) { + auto it = sdfs.find(logicalInput[i]); + if (it == sdfs.end()) + continue; - if (maxWidth) { - if (useBalancedIdeographicBreaking) { - auto lastPositionedGlyph = positionedGlyphs[positionedGlyphs.size() - 1]; - uint32_t estimatedLineCount = std::fmax(1, std::ceil(lastPositionedGlyph.x / maxWidth)); - maxWidth = lastPositionedGlyph.x / estimatedLineCount; + const SDFGlyph& glyph = it->second; + + // Ideographic characters, spaces, and word-breaking punctuation that often appear without + // surrounding spaces. + if (util::i18n::allowsWordBreaking(glyph.id) || + util::i18n::allowsIdeographicBreaking(glyph.id)) { + lastSafeBreak = i; + lastSafeBreakX = currentX; } - for (uint32_t i = 0; i < positionedGlyphs.size(); i++) { - PositionedGlyph &shape = positionedGlyphs[i]; - - shape.x -= lengthBeforeCurrentLine; - shape.y += lineFeedOffset * line; - - if (shape.x > maxWidth && lastSafeBreak > 0) { - - uint32_t lineLength = positionedGlyphs[lastSafeBreak + 1].x; - maxLineLength = util::max(lineLength, maxLineLength); - - for (uint32_t k = lastSafeBreak + 1; k <= i; k++) { - positionedGlyphs[k].y += lineFeedOffset; - positionedGlyphs[k].x -= lineLength; - } - - if (justify) { - // Collapse invisible characters. - uint32_t breakGlyph = positionedGlyphs[lastSafeBreak].glyph; - uint32_t lineEnd = lastSafeBreak; - if (util::i18n::isVisible(breakGlyph)) { - lineEnd--; - } - - justifyLine(positionedGlyphs, sdfs, lineStartIndex, lineEnd, justify); - } - - lineStartIndex = lastSafeBreak + 1; - lastSafeBreak = 0; - lengthBeforeCurrentLine += lineLength; - line++; - } - - // Ideographic characters, spaces, and word-breaking punctuation that often appear without surrounding spaces. - if (useBalancedIdeographicBreaking - || util::i18n::allowsWordBreaking(shape.glyph) - || util::i18n::allowsIdeographicBreaking(shape.glyph)) { - lastSafeBreak = i; - } + if (currentX > maxWidth && lastSafeBreak > 0) { + lineBreakPoints.insert(lastSafeBreak); + currentX -= lastSafeBreakX; + lastSafeBreakX = 0; } + + currentX += glyph.metrics.advance + spacing; } - const PositionedGlyph& lastPositionedGlyph = positionedGlyphs.back(); - const auto lastGlyphIt = sdfs.find(lastPositionedGlyph.glyph); - assert(lastGlyphIt != sdfs.end()); - const uint32_t lastLineLength = lastPositionedGlyph.x + lastGlyphIt->second.metrics.advance; - maxLineLength = std::max(maxLineLength, lastLineLength); + return lineBreakPoints; +} + +void GlyphSet::shapeLines(Shaping& shaping, + const std::vector& lines, + const float spacing, + const float lineHeight, + const float horizontalAlign, + const float verticalAlign, + const float justify, + const Point& translate) const { + + // the y offset *should* be part of the font metadata + const int32_t yOffset = -17; + + float x = 0; + float y = yOffset; + + float maxLineLength = 0; - const uint32_t height = (line + 1) * lineHeight; + for (std::u16string line : lines) { + // Collapse whitespace so it doesn't throw off justification + boost::algorithm::trim_if(line, boost::algorithm::is_any_of(u" \t\n\v\f\r")); + + if (line.empty()) + continue; + + uint32_t lineStartIndex = static_cast(shaping.positionedGlyphs.size()); + for (char16_t chr : line) { + auto it = sdfs.find(chr); + if (it == sdfs.end()) + continue; + + const SDFGlyph& glyph = it->second; + shaping.positionedGlyphs.emplace_back(chr, x, y); + x += glyph.metrics.advance + spacing; + } + + if (static_cast(shaping.positionedGlyphs.size()) == lineStartIndex) + continue; + + maxLineLength = util::max(x, maxLineLength); + + justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex, + static_cast(shaping.positionedGlyphs.size()) - 1, justify); + + x = 0; + y += lineHeight; // Move to next line + } - justifyLine(positionedGlyphs, sdfs, lineStartIndex, uint32_t(positionedGlyphs.size()) - 1, justify); - align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, line, translate); + align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight, + static_cast(lines.size()), translate); + const uint32_t height = lines.size() * lineHeight; // Calculate the bounding box shaping.top += -verticalAlign * height; diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp index b4fcf4c3a4..b48973b6ea 100644 --- a/src/mbgl/text/glyph_set.hpp +++ b/src/mbgl/text/glyph_set.hpp @@ -9,15 +9,34 @@ namespace mbgl { class GlyphSet { public: void insert(uint32_t id, SDFGlyph&&); - const std::map &getSDFs() const; - const Shaping getShaping(const std::u16string &string, const WritingDirection writingDirection, float maxWidth, float lineHeight, - float horizontalAlign, float verticalAlign, float justify, - float spacing, const Point &translate) const; - void lineWrap(Shaping &shaping, float lineHeight, float maxWidth, float horizontalAlign, - float verticalAlign, float justify, const Point &translate, - bool useBalancedIdeographicBreaking, const WritingDirection writingDirection) const; + const std::map& getSDFs() const; + const Shaping getShaping(const std::u16string& string, + float maxWidth, + float lineHeight, + float horizontalAlign, + float verticalAlign, + float justify, + float spacing, + const Point& translate, + BiDi& bidi) const; private: + float determineIdeographicLineWidth(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const; + std::set determineLineBreaks(const std::u16string& logicalInput, + const float spacing, + float maxWidth) const; + + void shapeLines(Shaping& shaping, + const std::vector& lines, + const float spacing, + float lineHeight, + float horizontalAlign, + float verticalAlign, + float justify, + const Point& translate) const; + std::map sdfs; }; -- cgit v1.2.1