diff options
author | Peng Wu <alexepico@gmail.com> | 2018-08-13 16:48:59 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2018-11-20 10:00:59 +0800 |
commit | 1ad4ac6065a405d0c6033633fd1b9ee019745d1c (patch) | |
tree | e098608e52af161c0945c8477649c905e8743aff /pango | |
parent | 01cc0a0029dca14dac815964f7be4c0789ec809b (diff) | |
download | pango-1ad4ac6065a405d0c6033633fd1b9ee019745d1c.tar.gz |
Update Grapheme Boundary to Unicode 11
Diffstat (limited to 'pango')
-rw-r--r-- | pango/break.c | 129 | ||||
-rw-r--r-- | pango/pango-emoji-private.h | 3 | ||||
-rw-r--r-- | pango/pango-emoji.c | 6 |
3 files changed, 42 insertions, 96 deletions
diff --git a/pango/break.c b/pango/break.c index 2e66972e..c50c18eb 100644 --- a/pango/break.c +++ b/pango/break.c @@ -24,6 +24,7 @@ #include "pango-break.h" #include "pango-engine-private.h" #include "pango-script-private.h" +#include "pango-emoji-private.h" #include "pango-impl-utils.h" #include <string.h> @@ -194,16 +195,12 @@ pango_default_break (const gchar *text, GB_SpacingMark, GB_InHangulSyllable, /* Handles all of L, V, T, LV, LVT rules */ /* Use state machine to handle emoji sequence */ - /* Rule GB10 and GB11 */ - GB_E_Base, - GB_E_Modifier, - GB_Glue_After_Zwj, - GB_E_Base_GAZ, /* Rule GB12 and GB13 */ GB_RI_Odd, /* Meets odd number of RI */ GB_RI_Even, /* Meets even number of RI */ } GraphemeBreakType; GraphemeBreakType prev_GB_type = GB_Other; + gboolean met_Extended_Pictographic = FALSE; /* See Word_Break Property Values table of UAX#29 */ typedef enum @@ -371,10 +368,14 @@ pango_default_break (const gchar *text, /* Just few spaces have variable width. So explicitly mark them. */ attrs[i].is_expandable_space = (0x0020 == wc || 0x00A0 == wc); + gboolean is_Extended_Pictographic = + _pango_Is_Emoji_Extended_Pictographic (wc); + /* ---- UAX#29 Grapheme Boundaries ---- */ { GraphemeBreakType GB_type; + /* Find the GraphemeBreakType of wc */ GB_type = GB_Other; switch ((int) type) @@ -447,70 +448,6 @@ pango_default_break (const gchar *text, break; case G_UNICODE_OTHER_SYMBOL: - if (G_UNLIKELY(wc == 0x261D || - wc == 0x26F9 || - (wc >= 0x270A && wc <= 0x270D) || - wc == 0x1F385 || - (wc >= 0x1F3C2 && wc <= 0x1F3C4) || - wc == 0x1F3C7 || - (wc >= 0x1F3CA && wc <= 0x1F3CC) || - (wc >= 0x1F442 && wc <= 0x1F443) || - (wc >= 0x1F446 && wc <= 0x1F450) || - wc == 0x1F46E || - (wc >= 0x1F470 && wc <= 0x1F478) || - wc == 0x1F47C || - (wc >= 0x1F481 && wc <= 0x1F483) || - (wc >= 0x1F485 && wc <= 0x1F487) || - wc == 0x1F4AA || - (wc >= 0x1F574 && wc <= 0x1F575) || - wc == 0x1F57A || - wc == 0x1F590 || - (wc >= 0x1F595 && wc <= 0x1F596) || - (wc >= 0x1F645 && wc <= 0x1F647) || - (wc >= 0x1F64B && wc <= 0x1F64F) || - wc == 0x1F6A3 || - (wc >= 0x1F6B4 && wc <= 0x1F6B6) || - wc == 0x1F6C0 || - wc == 0x1F6CC || - (wc >= 0x1F918 && wc <= 0x1F91C) || - (wc >= 0x1F91E && wc <= 0x1F91F) || - wc == 0x1F926 || - (wc >= 0x1F930 && wc <= 0x1F939) || - (wc >= 0x1F93D && wc <= 0x1F93E) || - (wc >= 0x1F9D1 && wc <= 0x1F9DD))) - { - GB_type = GB_E_Base; - break; - } - if (G_UNLIKELY(wc == 0x2640 || - wc == 0x2642 || - (wc >= 0x2695 && wc <= 0x2696) || - wc == 0x2708 || - wc == 0x2764 || - wc == 0x1F308 || - wc == 0x1F33E || - wc == 0x1F373 || - wc == 0x1F393 || - wc == 0x1F3A4 || - wc == 0x1F3A8 || - wc == 0x1F3EB || - wc == 0x1F3ED || - wc == 0x1F48B || - (wc >= 0x1F4BB && wc <= 0x1F4BC) || - wc == 0x1F527 || - wc == 0x1F52C || - wc == 0x1F5E8 || - wc == 0x1F680 || - wc == 0x1F692)) - { - GB_type = GB_Glue_After_Zwj; - break; - } - if (G_UNLIKELY(wc >= 0x1F466 && wc <= 0x1F469)) - { - GB_type = GB_E_Base_GAZ; - break; - } if (G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) { if (prev_GB_type == GB_RI_Odd) @@ -525,11 +462,28 @@ pango_default_break (const gchar *text, case G_UNICODE_MODIFIER_SYMBOL: if (wc >= 0x1F3FB && wc <= 0x1F3FF) - GB_type = GB_E_Modifier; + GB_type = GB_Extend; break; } + /* Rule GB11 */ + if (met_Extended_Pictographic) + { + if (GB_type == GB_Extend) + met_Extended_Pictographic = TRUE; + else if (_pango_Is_Emoji_Extended_Pictographic (prev_wc) && + GB_type == GB_ZWJ) + met_Extended_Pictographic = TRUE; + else if (prev_GB_type == GB_Extend && GB_type == GB_ZWJ) + met_Extended_Pictographic = TRUE; + else if (prev_GB_type == GB_ZWJ && is_Extended_Pictographic) + met_Extended_Pictographic = TRUE; + else + met_Extended_Pictographic = FALSE; + } + /* Grapheme Cluster Boundary Rules */ + is_grapheme_boundary = TRUE; /* Rule GB999 */ /* We apply Rules GB1 and GB2 at the end of the function */ if (wc == '\n' && prev_wc == '\r') @@ -540,9 +494,6 @@ pango_default_break (const gchar *text, is_grapheme_boundary = FALSE; /* Rules GB6, GB7, GB8 */ else if (GB_type == GB_Extend) { - /* Rule GB10 */ - if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ) - GB_type = prev_GB_type; is_grapheme_boundary = FALSE; /* Rule GB9 */ } else if (GB_type == GB_ZWJ) @@ -551,37 +502,23 @@ pango_default_break (const gchar *text, is_grapheme_boundary = FALSE; /* Rule GB9a */ else if (prev_GB_type == GB_Prepend) is_grapheme_boundary = FALSE; /* Rule GB9b */ - /* Rule GB10 */ - else if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ) - { - if (GB_type == GB_E_Modifier) - is_grapheme_boundary = FALSE; - else - is_grapheme_boundary = TRUE; - } - else if (prev_GB_type == GB_ZWJ && - (GB_type == GB_Glue_After_Zwj || GB_type == GB_E_Base_GAZ)) - is_grapheme_boundary = FALSE; /* Rule GB11 */ + else if (is_Extended_Pictographic) + { /* Rule GB11 */ + if (prev_GB_type == GB_ZWJ && met_Extended_Pictographic) + is_grapheme_boundary = FALSE; + } else if (prev_GB_type == GB_RI_Odd && GB_type == GB_RI_Even) is_grapheme_boundary = FALSE; /* Rule GB12 and GB13 */ - else - is_grapheme_boundary = TRUE; /* Rule GB999 */ + + if (is_Extended_Pictographic) + met_Extended_Pictographic = TRUE; attrs[i].is_cursor_position = is_grapheme_boundary; /* If this is a grapheme boundary, we have to decide if backspace * deletes a character or the whole grapheme cluster */ if (is_grapheme_boundary) { - if (prev_GB_type == GB_E_Base || - prev_GB_type == GB_E_Base_GAZ || - prev_GB_type == GB_Glue_After_Zwj || - prev_GB_type == GB_Extend || - prev_GB_type == GB_E_Modifier || - prev_GB_type == GB_RI_Odd || - prev_GB_type == GB_RI_Even) - attrs[i].backspace_deletes_character = FALSE; - else - attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character); + attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character); } else attrs[i].backspace_deletes_character = FALSE; diff --git a/pango/pango-emoji-private.h b/pango/pango-emoji-private.h index a360b37a..ed4b7213 100644 --- a/pango/pango-emoji-private.h +++ b/pango/pango-emoji-private.h @@ -24,6 +24,9 @@ #include <glib.h> +gboolean +_pango_Is_Emoji_Extended_Pictographic (gunichar ch); + typedef struct _PangoEmojiIter PangoEmojiIter; struct _PangoEmojiIter diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c index 46ab5b3f..158daa5b 100644 --- a/pango/pango-emoji.c +++ b/pango/pango-emoji.c @@ -92,7 +92,13 @@ DEFINE_pango_Is_(Emoji) DEFINE_pango_Is_(Emoji_Presentation) DEFINE_pango_Is_(Emoji_Modifier) DEFINE_pango_Is_(Emoji_Modifier_Base) +DEFINE_pango_Is_(Extended_Pictographic) +gboolean +_pango_Is_Emoji_Extended_Pictographic (gunichar ch) +{ + return _pango_Is_Extended_Pictographic (ch); +} static gboolean _pango_Is_Emoji_Text_Default (gunichar ch) |