From 2c9792d4b435e87e8616c22e1e5516d7302b06dc Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 25 Aug 2021 00:09:37 -0400 Subject: Refine hyphenation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace ‧ and | with a - when we break there. Update affected test output. Fixes: #603 --- pango/break.c | 15 +++++++++++-- pango/pango-break.h | 3 +++ pango/pango-layout.c | 50 ++++++++++++++++++++++++++++++++++------- pango/shape.c | 34 +++++++++++++++++++--------- tests/layouts/valid-17.expected | 2 +- 5 files changed, 83 insertions(+), 21 deletions(-) diff --git a/pango/break.c b/pango/break.c index 8e1aeb56..043ac0cc 100644 --- a/pango/break.c +++ b/pango/break.c @@ -1559,10 +1559,14 @@ default_break (const char *text, } /* --- Hyphens --- */ + { gboolean insert_hyphens; gboolean space_or_hyphen = FALSE; + attrs[i].break_inserts_hyphen = FALSE; + attrs[i].break_removes_preceding = FALSE; + switch ((int)script) { case PANGO_SCRIPT_COMMON: @@ -1599,7 +1603,6 @@ default_break (const char *text, wc == 0x1400 || /* Canadian syllabics hyphen */ wc == 0x1806 || /* Mongolian todo hyphen */ wc == 0x2010 || /* Hyphen */ - wc == 0x2027 || /* Hyphenation point */ wc == 0x2e17 || /* Double oblique hyphen */ wc == 0x2e40 || /* Double hyphen */ wc == 0x30a0 || /* Katakana-Hiragana double hyphen */ @@ -1617,6 +1620,13 @@ default_break (const char *text, else attrs[i].break_inserts_hyphen = insert_hyphens; + if (prev_wc == 0x007C || /* Vertical Line */ + prev_wc == 0x2027) /* Hyphenation point */ + { + attrs[i].break_inserts_hyphen = TRUE; + attrs[i].break_removes_preceding = TRUE; + } + prev_space_or_hyphen = space_or_hyphen; } @@ -1774,7 +1784,8 @@ break_attrs (const char *text, for (pos = start_pos + 1; pos < end_pos; pos++) { - log_attrs[pos].break_inserts_hyphen = FALSE; + if (!log_attrs[pos].break_removes_preceding) + log_attrs[pos].break_inserts_hyphen = FALSE; } } } while (pango_attr_iterator_next (&iter)); diff --git a/pango/pango-break.h b/pango/pango-break.h index 52febd3d..5d791e27 100644 --- a/pango/pango-break.h +++ b/pango/pango-break.h @@ -74,6 +74,8 @@ G_BEGIN_DECLS * semantics. (Since: 1.22) * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen. * Since: 1.50 + * @break_removes_preceding: when breaking lines before this char, remove the + * preceding char. Since 1.50 * * The `PangoLogAttr` structure stores information about the attributes of a * single character. @@ -94,6 +96,7 @@ struct _PangoLogAttr guint is_expandable_space : 1; guint is_word_boundary : 1; guint break_inserts_hyphen : 1; + guint break_removes_preceding : 1; }; PANGO_DEPRECATED_IN_1_44 diff --git a/pango/pango-layout.c b/pango/pango-layout.c index 4366450a..1ebe42cd 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -3601,7 +3601,8 @@ break_needs_hyphen (PangoLayout *layout, ParaBreakState *state, int pos) { - return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen; + return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen || + layout->log_attrs[state->start_offset + pos].break_removes_preceding; } static int @@ -3626,24 +3627,57 @@ find_hyphen_width (PangoItem *item) return 0; } +static int +find_char_width (PangoItem *item, + gunichar wc) +{ + hb_font_t *hb_font; + hb_codepoint_t glyph; + + if (!item->analysis.font) + return 0; + + hb_font = pango_font_get_hb_font (item->analysis.font); + if (hb_font_get_nominal_glyph (hb_font, wc, &glyph)) + return hb_font_get_glyph_h_advance (hb_font, glyph); + + return 0; +} + +static inline void +ensure_hyphen_width (ParaBreakState *state) +{ + if (state->hyphen_width < 0) + { + PangoItem *item = state->items->data; + state->hyphen_width = find_hyphen_width (item); + } +} + static int find_break_extra_width (PangoLayout *layout, ParaBreakState *state, int pos) { /* Check whether to insert a hyphen */ - if (break_needs_hyphen (layout, state, pos)) + if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen) { - if (state->hyphen_width < 0) + ensure_hyphen_width (state); + + if (layout->log_attrs[state->start_offset + pos].break_removes_preceding) { PangoItem *item = state->items->data; - state->hyphen_width = find_hyphen_width (item); - } + gunichar wc; - return state->hyphen_width; + wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1)); + + return state->hyphen_width - find_char_width (item, wc); + } + else + return state->hyphen_width; } - else - return 0; + + return 0; } #if 0 diff --git a/pango/shape.c b/pango/shape.c index 707534ed..62c0f025 100644 --- a/pango/shape.c +++ b/pango/shape.c @@ -344,6 +344,7 @@ pango_hb_shape (const char *item_text, int paragraph_length, const PangoAnalysis *analysis, PangoLogAttr *log_attrs, + int num_chars, PangoGlyphString *glyphs, PangoShapeFlags flags) { @@ -362,6 +363,7 @@ pango_hb_shape (const char *item_text, unsigned int num_features = 0; PangoGlyphInfo *infos; PangoTextTransform transform; + int hyphen_index; g_return_if_fail (analysis != NULL); g_return_if_fail (analysis->font != NULL); @@ -392,6 +394,17 @@ pango_hb_shape (const char *item_text, hb_buffer_set_flags (hb_buffer, hb_buffer_flags); hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY); + if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN) + { + const char *p = paragraph_text + item_offset + item_length; + int last_char_len = p - g_utf8_prev_char (p); + + hyphen_index = item_offset + item_length - last_char_len; + + if (log_attrs[num_chars].break_removes_preceding) + item_length -= last_char_len; + } + /* Add pre-context */ hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0); @@ -407,7 +420,9 @@ pango_hb_shape (const char *item_text, /* Transform the item text according to text transform. * Note: we assume text transforms won't cross font boundaries */ - for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p = g_utf8_next_char (p), i++) + for (p = paragraph_text + item_offset, i = 0; + p < paragraph_text + item_offset + item_length; + p = g_utf8_next_char (p), i++) { int index = p - paragraph_text; gunichar ch = g_utf8_get_char (p); @@ -457,15 +472,13 @@ pango_hb_shape (const char *item_text, /* Insert either a Unicode or ASCII hyphen. We may * want to look for script-specific hyphens here. */ - const char *p = paragraph_text + item_offset + item_length; - int last_char_len = p - g_utf8_prev_char (p); hb_codepoint_t glyph; /* Note: We rely on hb_buffer_add clearing existing post-context */ if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph)) - hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len); + hb_buffer_add (hb_buffer, 0x2010, hyphen_index); else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph)) - hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len); + hb_buffer_add (hb_buffer, '-', hyphen_index); } pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features); @@ -578,6 +591,7 @@ pango_shape_internal (const char *item_text, int paragraph_length, const PangoAnalysis *analysis, PangoLogAttr *log_attrs, + int num_chars, PangoGlyphString *glyphs, PangoShapeFlags flags) { @@ -605,9 +619,8 @@ pango_shape_internal (const char *item_text, pango_hb_shape (item_text, item_length, paragraph_text, paragraph_length, analysis, - log_attrs, - glyphs, - flags); + log_attrs, num_chars, + glyphs, flags); if (G_UNLIKELY (glyphs->num_glyphs == 0)) { @@ -866,7 +879,7 @@ pango_shape_with_flags (const char *item_text, { pango_shape_internal (item_text, item_length, paragraph_text, paragraph_length, - analysis, NULL, + analysis, NULL, 0, glyphs, flags); } @@ -905,7 +918,8 @@ pango_shape_item (PangoItem *item, { pango_shape_internal (paragraph_text + item->offset, item->length, paragraph_text, paragraph_length, - &item->analysis, log_attrs, + &item->analysis, + log_attrs, item->num_chars, glyphs, flags); } diff --git a/tests/layouts/valid-17.expected b/tests/layouts/valid-17.expected index 4b3192fb..a2b7d494 100644 --- a/tests/layouts/valid-17.expected +++ b/tests/layouts/valid-17.expected @@ -28,7 +28,7 @@ i=3, index=17, paragraph-start=1, dir=ltr '' --- runs -i=1, index=0, chars=13, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'some|bla|bla|' +i=1, index=0, chars=13, level=0, gravity=south, flags=4, font=OMITTED, script=latin, language=en-us, 'some|bla|bla|' i=2, index=13, no run, line end i=3, index=13, chars=3, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'bla' i=4, index=16, no run, line end -- cgit v1.2.1